import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from itertools import combinations
from google.colab import data_table
#from vega_datasets import data
from google.colab import files
# Read-in LA-DOT API datasets and rename the column headers.
current = pd.read_csv('current.csv', names=['DateTime', 'Current'])
voltage = pd.read_csv('voltage.csv', names=['DateTime', 'Voltage'])
power = pd.read_csv('power.csv', names=['DateTime', 'Power'])
speed = pd.read_csv('speed.csv', names=['DateTime', 'Speed'])
gps = pd.read_csv('gps.csv', names=['DateTime', 'GPS'])
odo = pd.read_csv('odo.csv', names=['DateTime', 'Odometer'])
# Read-in GPS dataset with elevation info.
gps1 = pd.read_csv('gps1_elev.txt', sep='\s+')
gps2 = pd.read_csv('gps2_elev.txt', sep='\s+')
gps3 = pd.read_csv('gps3_elev.txt', sep='\s+')
gps4 = pd.read_csv('gps4_elev.txt', sep='\s+')
gps5 = pd.read_csv('gps5_elev.txt', sep='\s+')
gps6 = pd.read_csv('gps6_elev.txt', sep='\s+')
gps7 = pd.read_csv('gps7_elev.txt', sep='\s+')
# Create dataframe with just the elevation feature, excluding other header information attached from gpsvisulaizer.com.
gps1_e = gps1['altitude']
gps2_e = gps2['altitude']
gps3_e = gps3['altitude']
gps4_e = gps4['altitude']
gps5_e = gps5['altitude']
gps6_e = gps6['altitude']
gps7_e = gps7['altitude']
# Concatenate all the GPS+elevation dataset chunks into one dataframe.
gps_subsets = [gps1_e, gps2_e, gps3_e, gps4_e, gps5_e, gps6_e, gps7_e]
gps_concat = pd.concat(gps_subsets, ignore_index=True)
# Concatenate elevation dataset horizontally(axis=1) with the original LA-DOT GPS dataset to align them.
gps_concat_elev = pd.concat([gps, gps_concat], axis=1)
# Just a minor name change from 'altitude' to 'elevation' to reflect that it is a measure of elevation.
# (Object vs Place)
gps_concat_elev.rename(columns = {'altitude':'Elevation'}, inplace = True)
# Delete GPS column as it is not needed for the initial analysis.
elev = gps_concat_elev.drop('GPS', axis=1)
# Read-in climate/temperature dataset.
temp = pd.read_csv('la_climate_aug.csv', header = 0, usecols=['DATE', 'HourlyDryBulbTemperature'])
# Rename column names to our working names.
temp.rename(columns = {'DATE':'DateTime', 'HourlyDryBulbTemperature':'Temperature'}, inplace = True)
Check duplicates before converting date-time feature into pandas Timestamp object, and eventually into DatetimeIndex object.
But in order to drop milliseconds, we have to convert date-time feature into pandas timestamp object.
After converting date-time to Timestamp object, drop the millisecond resolution.
This will enable some useful features such as resampling/up-sampling if we decide to. (you can only resample on DatetimeIndex objects or datetime-like index objects, not Timestamp object).
asfreq() is for timeseries index only or dateTimeIndex object. In other words, it works only for index objects and not Timestamp object.
# Convert date-time("DateTime") into pd.Timestamp object.
def convert_to_dateTimeObj(*dataframe):
for x in dataframe:
x['DateTime'] = pd.to_datetime(x['DateTime'])
convert_to_dateTimeObj(current, voltage, power, speed, elev, odo, temp)
# Count the number of duplicates in 'DateTime' timestamps in the datasets.
# Giving names to dataframes for convenience for the following function, count_duplicates().
voltage.name = 'Voltage'
current.name = 'Current'
power.name = 'Power'
speed.name = 'Speed'
elev.name = 'Elevation'
odo.name = 'Odometer'
temp.name = 'Temperature'
def count_duplicates(*dataframe):
for x in dataframe:
print(x.name, "has", x.duplicated(subset='DateTime').sum(), "duplicate timestamps.", '\n')
count_duplicates(current, voltage, power, speed, elev, odo, temp)
Current has 0 duplicate timestamps. Voltage has 0 duplicate timestamps. Power has 0 duplicate timestamps. Speed has 0 duplicate timestamps. Elevation has 0 duplicate timestamps. Odometer has 0 duplicate timestamps. Temperature has 1 duplicate timestamps.
def count_NaNs(*dataframe):
for x in dataframe:
print("Count of NaNs for " + x.columns[1] + "-" + x.columns[0] + ": ", x.iloc[:, 0].isnull().sum())
print("Count of NaNs for " + x.columns[1] + "-" + x.columns[1] + ": ", x.iloc[:, 1].isnull().sum())
#if x.columns[2]:
#print("Count of NaNs for " + x.columns[1] + "-" + x.columns[2] + ": ", x.iloc[:, 2].isnull().sum())
print('\n')
count_NaNs(current, voltage, power, speed, elev, odo, temp)
Count of NaNs for Current-DateTime: 0 Count of NaNs for Current-Current: 0 Count of NaNs for Voltage-DateTime: 0 Count of NaNs for Voltage-Voltage: 0 Count of NaNs for Power-DateTime: 0 Count of NaNs for Power-Power: 0 Count of NaNs for Speed-DateTime: 0 Count of NaNs for Speed-Speed: 0 Count of NaNs for Elevation-DateTime: 0 Count of NaNs for Elevation-Elevation: 0 Count of NaNs for Odometer-DateTime: 0 Count of NaNs for Odometer-Odometer: 0 Count of NaNs for Temperature-DateTime: 0 Count of NaNs for Temperature-Temperature: 32
def print_shape(*dataframe):
for x in dataframe:
print(x.columns[1], 'shape: ', x.shape)
print_shape(current, voltage, power, speed, elev, odo, temp)
Current shape: (742082, 2) Voltage shape: (258381, 2) Power shape: (742000, 2) Speed shape: (289669, 2) Elevation shape: (624301, 2) Odometer shape: (42118, 2) Temperature shape: (853, 2)
def get_datetime_range(*dataframe):
for x in dataframe:
print(x.columns[1], 'minimum date: ', x.DateTime.min())
print(x.columns[1], 'maximum date: ', x.DateTime.max())
print(x.columns[1], 'date range: ', x.DateTime.max() - x.DateTime.min(), '\n')
get_datetime_range(current, voltage, power, speed, elev, odo, temp)
Current minimum date: 2020-08-01 12:09:44.499000 Current maximum date: 2020-08-30 12:59:59.645000 Current date range: 29 days 00:50:15.146000 Voltage minimum date: 2020-08-01 12:09:44.499000 Voltage maximum date: 2020-08-30 12:57:19.642000 Voltage date range: 29 days 00:47:35.143000 Power minimum date: 2020-08-01 12:09:44.499000 Power maximum date: 2020-08-30 12:59:59.645000 Power date range: 29 days 00:50:15.146000 Speed minimum date: 2020-08-01 12:20:28.183000 Speed maximum date: 2020-08-30 12:54:22.646000 Speed date range: 29 days 00:33:54.463000 Elevation minimum date: 2020-08-01 12:07:42.929000 Elevation maximum date: 2020-08-30 12:59:58.734000 Elevation date range: 29 days 00:52:15.805000 Odometer minimum date: 2020-08-01 12:22:08.677000 Odometer maximum date: 2020-08-30 12:54:18.392000 Odometer date range: 29 days 00:32:09.715000 Temperature minimum date: 2020-08-01 00:52:00 Temperature maximum date: 2020-08-31 23:59:00 Temperature date range: 30 days 23:07:00
"""
For reference:
Current shape: (742082, 2)
Voltage shape: (258381, 2)
Power shape: (742000, 2)
Speed shape: (289669, 2)
Elevation shape: (624301, 2)
Odometer shape: (42118, 2)
Temperature shape: (853, 2)
"""
# Math - dividing the range of dates divided by number of rows to get average time interval.
# Arithematic works because dates are pandas DateTime objects.
def time_interval(dataframe, num_of_rows):
date_range = dataframe.DateTime.max() - dataframe.DateTime.min()
interval = date_range/num_of_rows
return interval
print("Current dataset average time interval: ", time_interval(current, 742082))
print("Voltage dataset average time interval: ", time_interval(voltage, 258381))
print("Power dataset average time interval: ", time_interval(power, 742000))
print("Speed dataset average time interval: ", time_interval(speed, 289669))
print("Elevation dataset average time interval: ", time_interval(elev, 624301))
print("Odometer dataset average time interval: ", time_interval(odo, 42118))
print("Temperature dataset average time interval: ", time_interval(temp, 853))
Current dataset average time interval: 0 days 00:00:03.380509358 Voltage dataset average time interval: 0 days 00:00:09.708357592 Power dataset average time interval: 0 days 00:00:03.380882946 Speed dataset average time interval: 0 days 00:00:08.656896191 Elevation dataset average time interval: 0 days 00:00:04.018471546 Odometer dataset average time interval: 0 days 00:00:59.535821145 Temperature dataset average time interval: 0 days 00:52:16.248534583
from itertools import combinations
all_dataSets = [current, voltage, power, speed, elev, odo, temp]
comb = combinations(all_dataSets, 2)
for i,j in comb:
count = i['DateTime'].isin(j['DateTime']).sum()
print(i.columns[1], ' matches ', j.columns[1], count, ' times.')
Current matches Voltage 257915 times. Current matches Power 741729 times. Current matches Speed 0 times. Current matches Elevation 416 times. Current matches Odometer 0 times. Current matches Temperature 1 times. Voltage matches Power 257831 times. Voltage matches Speed 0 times. Voltage matches Elevation 231 times. Voltage matches Odometer 0 times. Voltage matches Temperature 1 times. Power matches Speed 0 times. Power matches Elevation 416 times. Power matches Odometer 0 times. Power matches Temperature 1 times. Speed matches Elevation 293 times. Speed matches Odometer 16 times. Speed matches Temperature 0 times. Elevation matches Odometer 21 times. Elevation matches Temperature 1 times. Odometer matches Temperature 0 times.
'''
index = ['Current', 'Voltage', 'Power', 'Speed', 'Elevation', 'Odometer']
columns = ['Voltage', 'Power', 'Speed', 'Elevation', 'Odometer', 'Temperature']
data = [257915, 741729, 0, 416, 0, 1, 257831, 0, 231, 0, 1, 0, 416, 0, 1, 293, 16, 0, 21, 1, 0]
match_matrix = pd.DataFrame(data, index=index, columns=columns)
'''
"\nindex = ['Current', 'Voltage', 'Power', 'Speed', 'Elevation', 'Odometer']\ncolumns = ['Voltage', 'Power', 'Speed', 'Elevation', 'Odometer', 'Temperature']\ndata = [257915, 741729, 0, 416, 0, 1, 257831, 0, 231, 0, 1, 0, 416, 0, 1, 293, 16, 0, 21, 1, 0]\n\nmatch_matrix = pd.DataFrame(data, index=index, columns=columns)\n"
# Getting basic information ready for summary table construction.
# Count duplicate timestamps.
c_dup = current.duplicated(subset='DateTime').sum()
v_dup = voltage.duplicated(subset='DateTime').sum()
p_dup = power.duplicated(subset='DateTime').sum()
s_dup = speed.duplicated(subset='DateTime').sum()
e_dup = elev.duplicated(subset='DateTime').sum()
o_dup = odo.duplicated(subset='DateTime').sum()
t_dup = temp.duplicated(subset='DateTime').sum()
# Count number of NaNs.
c_null = current.isnull().sum()
v_null = voltage.isnull().sum()
p_null = power.isnull().sum()
s_null = speed.isnull().sum()
e_null = elev.isnull().sum()
o_null = odo.isnull().sum()
t_null = temp.isnull().sum()
# Shape.
c_shape = current.shape
v_shape = voltage.shape
p_shape = power.shape
s_shape = speed.shape
e_shape = elev.shape
o_shape = odo.shape
t_shape = temp.shape
# Min date.
c_min = current.DateTime.min()
v_min = voltage.DateTime.min()
p_min = power.DateTime.min()
s_min = speed.DateTime.min()
e_min = elev.DateTime.min()
o_min = odo.DateTime.min()
t_min = temp.DateTime.min()
# Max date.
c_max = current.DateTime.max()
v_max = voltage.DateTime.max()
p_max = power.DateTime.max()
s_max = speed.DateTime.max()
e_max = elev.DateTime.max()
o_max = odo.DateTime.max()
t_max = temp.DateTime.max()
# Date range.
c_range = c_max-c_min
v_range = v_max-v_min
p_range = p_max-p_min
s_range = s_max-s_min
e_range = e_max-e_min
o_range = o_max-o_min
t_range = t_max-t_min
"""
For reference:
Current shape: (742082, 2)
Voltage shape: (258381, 2)
Power shape: (742000, 2)
Speed shape: (289669, 2)
Elevation shape: (624301, 2)
Odometer shape: (42118, 2)
Temperature shape: (853, 2)
"""
# Time interval.
c_time_int = c_range/742082
v_time_int = v_range/258381
p_time_int = p_range/742000
s_time_int = s_range/289669
e_time_int = e_range/624301
o_time_int = o_range/42118
t_time_int = t_range/853
# Create table of dataset summary.
d = {
'Count Duplicate Timestamp': [c_dup, v_dup, p_dup, s_dup, e_dup, o_dup, t_dup],
'Count NaNs': [c_null, v_null, p_null, s_null, e_null, o_null, t_null],
'Shape(row column)': [c_shape, v_shape, p_shape, s_shape, e_shape, o_shape, t_shape],
'Min. Date': [c_min, v_min, p_min, s_min, e_min, o_min, t_min],
'Max. Date': [c_max, v_max, p_max, s_max, e_max, o_max, t_max],
'Date Range': [c_range, v_range, p_range, s_range, e_range, o_range, t_range],
'Avg. Time Interval': [c_time_int, v_time_int, p_time_int, s_time_int, e_time_int, o_time_int, t_time_int]
}
summary_table = pd.DataFrame(d, index=['Current', 'Voltage', 'Power', 'Speed', 'Elevation', 'Odometer', 'Temperature'])
summary_table
| Count Duplicate Timestamp | Count NaNs | Shape(row column) | Min. Date | Max. Date | Date Range | Avg. Time Interval | |
|---|---|---|---|---|---|---|---|
| Current | 0 | DateTime 0 Current 0 dtype: int64 | (742082, 2) | 2020-08-01 12:09:44.499 | 2020-08-30 12:59:59.645 | 29 days 00:50:15.146000 | 0 days 00:00:03.380509358 |
| Voltage | 0 | DateTime 0 Voltage 0 dtype: int64 | (258381, 2) | 2020-08-01 12:09:44.499 | 2020-08-30 12:57:19.642 | 29 days 00:47:35.143000 | 0 days 00:00:09.708357592 |
| Power | 0 | DateTime 0 Power 0 dtype: int64 | (742000, 2) | 2020-08-01 12:09:44.499 | 2020-08-30 12:59:59.645 | 29 days 00:50:15.146000 | 0 days 00:00:03.380882946 |
| Speed | 0 | DateTime 0 Speed 0 dtype: int64 | (289669, 2) | 2020-08-01 12:20:28.183 | 2020-08-30 12:54:22.646 | 29 days 00:33:54.463000 | 0 days 00:00:08.656896191 |
| Elevation | 0 | DateTime 0 Elevation 0 dtype: int64 | (624301, 2) | 2020-08-01 12:07:42.929 | 2020-08-30 12:59:58.734 | 29 days 00:52:15.805000 | 0 days 00:00:04.018471546 |
| Odometer | 0 | DateTime 0 Odometer 0 dtype: int64 | (42118, 2) | 2020-08-01 12:22:08.677 | 2020-08-30 12:54:18.392 | 29 days 00:32:09.715000 | 0 days 00:00:59.535821145 |
| Temperature | 1 | DateTime 0 Temperature 32 dtype: int64 | (853, 2) | 2020-08-01 00:52:00.000 | 2020-08-31 23:59:00.000 | 30 days 23:07:00 | 0 days 00:52:16.248534583 |
# Output table to html for website.
html = summary_table.to_html()
# Write html to file.
file = open("raw_summary_table.html", "w")
file.write(html)
file.close()
# Decrease the resolution of time by dropping millisecond.
def drop_ms(*dataframe):
for x in dataframe:
x['DateTime'] = x['DateTime'].round('S')
drop_ms(current, voltage, power, speed, elev, odo, temp)
# NaNs for temp is dispersed throughout the dataset, i.e. it is not contiguous, therefore is a good
# candidate for interpolation to fill in the NaNs.
temp
| DateTime | Temperature | |
|---|---|---|
| 0 | 2020-08-01 00:52:00 | 66.0 |
| 1 | 2020-08-01 01:52:00 | 64.0 |
| 2 | 2020-08-01 02:52:00 | 63.0 |
| 3 | 2020-08-01 03:52:00 | 64.0 |
| 4 | 2020-08-01 04:52:00 | 62.0 |
| ... | ... | ... |
| 848 | 2020-08-31 21:52:00 | 66.0 |
| 849 | 2020-08-31 22:52:00 | 66.0 |
| 850 | 2020-08-31 23:52:00 | 66.0 |
| 851 | 2020-08-31 23:59:00 | NaN |
| 852 | 2020-08-31 23:59:00 | NaN |
853 rows × 2 columns
# Fill-in temperature NaNs using linear interpolation.
import pytz
#temp['DateTime'] = temp.DateTime.dt.tz_localize('UTC').dt.tz_convert('US/Pacific')
#temp['DateTime'] = temp['DateTime'].dt.tz_localize('UTC').dt.tz_convert('US/Pacific')
#temp.interpolate(method='linear', inplace=True) # This line started giving error message when it didn't before...something
# to do with timezone issues with date-time. Did something change in pandas?
# Hence, code below was used on the series that needed interpolation.
# This will be an issue during merging as noted in the 'Merge' section.
temp['Temperature'].interpolate(method='linear', inplace=True)
temp
| DateTime | Temperature | |
|---|---|---|
| 0 | 2020-08-01 00:52:00 | 66.0 |
| 1 | 2020-08-01 01:52:00 | 64.0 |
| 2 | 2020-08-01 02:52:00 | 63.0 |
| 3 | 2020-08-01 03:52:00 | 64.0 |
| 4 | 2020-08-01 04:52:00 | 62.0 |
| ... | ... | ... |
| 848 | 2020-08-31 21:52:00 | 66.0 |
| 849 | 2020-08-31 22:52:00 | 66.0 |
| 850 | 2020-08-31 23:52:00 | 66.0 |
| 851 | 2020-08-31 23:59:00 | 66.0 |
| 852 | 2020-08-31 23:59:00 | 66.0 |
853 rows × 2 columns
# After millisecond drop and filling-in NaNs of temperature.
# Count duplicate timestamps.
c_dup_d = current.duplicated(subset='DateTime').sum()
v_dup_d = voltage.duplicated(subset='DateTime').sum()
p_dup_d = power.duplicated(subset='DateTime').sum()
s_dup_d = speed.duplicated(subset='DateTime').sum()
e_dup_d = elev.duplicated(subset='DateTime').sum()
o_dup_d = odo.duplicated(subset='DateTime').sum()
t_dup_d = temp.duplicated(subset='DateTime').sum()
# Count number of NaNs.
c_null_d = current.isnull().sum()
v_null_d = voltage.isnull().sum()
p_null_d = power.isnull().sum()
s_null_d = speed.isnull().sum()
e_null_d = elev.isnull().sum()
o_null_d = odo.isnull().sum()
t_null_d = temp.isnull().sum()
# Shape.
c_shape_d = current.shape
v_shape_d = voltage.shape
p_shape_d = power.shape
s_shape_d = speed.shape
e_shape_d = elev.shape
o_shape_d = odo.shape
t_shape_d = temp.shape
# Min date.
c_min_d = current.DateTime.min()
v_min_d = voltage.DateTime.min()
p_min_d = power.DateTime.min()
s_min_d = speed.DateTime.min()
e_min_d = elev.DateTime.min()
o_min_d = odo.DateTime.min()
t_min_d = temp.DateTime.min()
# Max date.
c_max_d = current.DateTime.max()
v_max_d = voltage.DateTime.max()
p_max_d = power.DateTime.max()
s_max_d = speed.DateTime.max()
e_max_d = elev.DateTime.max()
o_max_d = odo.DateTime.max()
t_max_d = temp.DateTime.max()
# Date range.
c_range_d = c_max_d-c_min_d
v_range_d = v_max_d-v_min_d
p_range_d = p_max_d-p_min_d
s_range_d = s_max_d-s_min_d
e_range_d = e_max_d-e_min_d
o_range_d = o_max_d-o_min_d
t_range_d = t_max_d-t_min_d
"""
For reference:
Current shape: (742082, 2)
Voltage shape: (258381, 2)
Power shape: (742000, 2)
Speed shape: (289669, 2)
Elevation shape: (624301, 2)
Odometer shape: (42118, 2)
Temperature shape: (853, 2)
"""
# Time interval.
c_time_int_d = c_range_d/742082
v_time_int_d = v_range_d/258381
p_time_int_d = p_range_d/742000
s_time_int_d = s_range_d/289669
e_time_int_d = e_range_d/624301
o_time_int_d = o_range_d/42118
t_time_int_d = t_range_d/853
# Create table of dataset summary.
d_d = {
'Count Duplicate Timestamp': [c_dup_d, v_dup_d, p_dup_d, s_dup_d, e_dup_d, o_dup_d, t_dup_d],
'Count NaNs': [c_null_d, v_null_d, p_null_d, s_null_d, e_null_d, o_null_d, t_null_d],
'Shape(row column)': [c_shape_d, v_shape_d, p_shape_d, s_shape_d, e_shape_d, o_shape_d, t_shape_d],
'Min. Date': [c_min_d, v_min_d, p_min_d, s_min_d, e_min_d, o_min_d, t_min_d],
'Max. Date': [c_max_d, v_max_d, p_max_d, s_max_d, e_max_d, o_max_d, t_max_d],
'Date Range': [c_range_d, v_range_d, p_range_d, s_range_d, e_range_d, o_range_d, t_range_d],
'Avg. Time Interval': [c_time_int_d, v_time_int_d, p_time_int_d, s_time_int_d, e_time_int_d, o_time_int_d, t_time_int_d]
}
summary_table_afterdrop = pd.DataFrame(d_d, index=['Current', 'Voltage', 'Power', 'Speed', 'Elevation', 'Odometer', 'Temperature'])
summary_table_afterdrop
| Count Duplicate Timestamp | Count NaNs | Shape(row column) | Min. Date | Max. Date | Date Range | Avg. Time Interval | |
|---|---|---|---|---|---|---|---|
| Current | 541 | DateTime 0 Current 0 dtype: int64 | (742082, 2) | 2020-08-01 12:09:44 | 2020-08-30 13:00:00 | 29 days 00:50:16 | 0 days 00:00:03.380510509 |
| Voltage | 227 | DateTime 0 Voltage 0 dtype: int64 | (258381, 2) | 2020-08-01 12:09:44 | 2020-08-30 12:57:20 | 29 days 00:47:36 | 0 days 00:00:09.708360908 |
| Power | 461 | DateTime 0 Power 0 dtype: int64 | (742000, 2) | 2020-08-01 12:09:44 | 2020-08-30 13:00:00 | 29 days 00:50:16 | 0 days 00:00:03.380884097 |
| Speed | 128 | DateTime 0 Speed 0 dtype: int64 | (289669, 2) | 2020-08-01 12:20:28 | 2020-08-30 12:54:23 | 29 days 00:33:55 | 0 days 00:00:08.656898045 |
| Elevation | 1 | DateTime 0 Elevation 0 dtype: int64 | (624301, 2) | 2020-08-01 12:07:43 | 2020-08-30 12:59:59 | 29 days 00:52:16 | 0 days 00:00:04.018471858 |
| Odometer | 7 | DateTime 0 Odometer 0 dtype: int64 | (42118, 2) | 2020-08-01 12:22:09 | 2020-08-30 12:54:18 | 29 days 00:32:09 | 0 days 00:00:59.535804169 |
| Temperature | 1 | DateTime 0 Temperature 0 dtype: int64 | (853, 2) | 2020-08-01 00:52:00 | 2020-08-31 23:59:00 | 30 days 23:07:00 | 0 days 00:52:16.248534583 |
# Output tabel to html for website.
html = summary_table_afterdrop.to_html()
# Write html to file.
file = open("raw_summary_table_afterdrop.html", "w")
file.write(html)
file.close()
# Count matching timestamps among the datasets after dropping milliseconds.
from itertools import combinations
all_dataSets = [current, voltage, power, speed, elev, odo, temp]
comb = combinations(all_dataSets, 2)
for i,j in comb:
count = i['DateTime'].isin(j['DateTime']).sum()
print(i.columns[1], ' matches ', j.columns[1], count, ' times.')
Current matches Voltage 257996 times. Current matches Power 741810 times. Current matches Speed 288070 times. Current matches Elevation 422461 times. Current matches Odometer 41987 times. Current matches Temperature 227 times. Voltage matches Power 257926 times. Voltage matches Speed 200240 times. Voltage matches Elevation 229165 times. Voltage matches Odometer 30535 times. Voltage matches Temperature 82 times. Power matches Speed 288017 times. Power matches Elevation 422401 times. Power matches Odometer 41976 times. Power matches Temperature 227 times. Speed matches Elevation 272538 times. Speed matches Odometer 33487 times. Speed matches Temperature 89 times. Elevation matches Odometer 41221 times. Elevation matches Temperature 186 times. Odometer matches Temperature 15 times.
# Drop dupicates(of timestamp) from each dataset.
def my_drop_duplicates(*dataframe):
for x in dataframe:
x.drop_duplicates(subset='DateTime', inplace=True)
my_drop_duplicates(current, voltage, power, speed, elev, odo, temp)
# Count matching timestamps among the datasets AFTER dropping the duplicates.
from itertools import combinations
all_dataSets = [current, voltage, power, speed, elev, odo, temp]
comb = combinations(all_dataSets, 2)
for i,j in comb:
count = i['DateTime'].isin(j['DateTime']).sum()
print(i.columns[1], ' matches ', j.columns[1], count, ' times.')
Current matches Voltage 257707 times. Current matches Power 741272 times. Current matches Speed 287872 times. Current matches Elevation 422164 times. Current matches Odometer 41958 times. Current matches Temperature 227 times. Voltage matches Power 257702 times. Voltage matches Speed 200132 times. Voltage matches Elevation 229026 times. Voltage matches Odometer 30519 times. Voltage matches Temperature 82 times. Power matches Speed 287819 times. Power matches Elevation 422119 times. Power matches Odometer 41947 times. Power matches Temperature 227 times. Speed matches Elevation 272418 times. Speed matches Odometer 33470 times. Speed matches Temperature 88 times. Elevation matches Odometer 41221 times. Elevation matches Temperature 186 times. Odometer matches Temperature 15 times.
# TODO: code for combinatoric matrix for above output. Fix code below.
'''
#combo_count = pd.DataFrame()
for i,j in comb:
count = i['DateTime'].isin(j['DateTime']).sum()
#print(i.columns[1], ' matches ', j.columns[1], count, ' times.')
index+=([i.columns[1]])
column+=([j.columns[1]])
combo_count = pd.DataFrame.from_records(index=index, columns=column, data=count)
'''
"\n#combo_count = pd.DataFrame()\nfor i,j in comb:\n count = i['DateTime'].isin(j['DateTime']).sum()\n #print(i.columns[1], ' matches ', j.columns[1], count, ' times.')\n index+=([i.columns[1]])\n column+=([j.columns[1]])\n combo_count = pd.DataFrame.from_records(index=index, columns=column, data=count)\n"
Note: interpolation was defered to after the merge was completed for some of the datasets due to error message that wasn't there initially when merge and interpolation was done together in one line of code(commented out) for the whole dataframe. (instead, interpolation was applied to a series of interest).
"""Preview of merge before and after dropping duplicates."""
""" Before dropping duplicates.
Current matches Voltage 257996 times.
Current matches Power 741810 times.
Current matches Speed 288070 times.
Current matches Elevation 422461 times.
Current matches Odometer 41987 times.
Current matches Temperature 227 times.
Voltage matches Power 257926 times.
Voltage matches Speed 200240 times.
Voltage matches Elevation 229165 times.
Voltage matches Odometer 30535 times.
Voltage matches Temperature 82 times.
Power matches Speed 288017 times.
Power matches Elevation 422401 times.
Power matches Odometer 41976 times.
Power matches Temperature 227 times.
Speed matches Elevation 272538 times.
Speed matches Odometer 33487 times.
Speed matches Temperature 89 times.
Elevation matches Odometer 41221 times.
Elevation matches Temperature 186 times.
Odometer matches Temperature 15 times.
"""
"""After dropping duplicates.
Current matches Voltage 257707 times.
Current matches Power 741272 times.
Current matches Speed 287872 times.
Current matches Elevation 422164 times.
Current matches Odometer 41958 times.
Current matches Temperature 227 times.
Voltage matches Power 257702 times.
Voltage matches Speed 200132 times.
Voltage matches Elevation 229026 times.
Voltage matches Odometer 30519 times.
Voltage matches Temperature 82 times.
Power matches Speed 287819 times.
Power matches Elevation 422119 times.
Power matches Odometer 41947 times.
Power matches Temperature 227 times.
Speed matches Elevation 272418 times.
Speed matches Odometer 33470 times.
Speed matches Temperature 88 times.
Elevation matches Odometer 41221 times.
Elevation matches Temperature 186 times.
Odometer matches Temperature 15 times.
"""
#cp = pd.merge(current, power, left_index=True, right_index=True)
cp = pd.merge(current, power, on='DateTime')
#cpv = pd.merge(cp, voltage, left_index=True, right_index=True)
cpv = pd.merge(cp, voltage, on='DateTime')
#cpve = pd.merge(cpv, elev, left_index=True, right_index=True)
cpve = pd.merge(cpv, elev, on='DateTime')
cpve
| DateTime | Current | Power | Voltage | Elevation | |
|---|---|---|---|---|---|
| 0 | 2020-08-01 12:10:15 | 8.3 | 4.3575 | 525 | 68.5 |
| 1 | 2020-08-01 12:10:16 | 8.3 | 4.3492 | 524 | 68.4 |
| 2 | 2020-08-01 12:10:47 | 9.7 | 5.0828 | 524 | 68.7 |
| 3 | 2020-08-01 12:10:48 | 9.5 | 4.9685 | 523 | 68.7 |
| 4 | 2020-08-01 12:11:41 | 9.5 | 4.9685 | 523 | 68.7 |
| ... | ... | ... | ... | ... | ... |
| 228828 | 2020-08-30 12:56:56 | 3.2 | 1.6512 | 516 | 34.3 |
| 228829 | 2020-08-30 12:56:57 | 3.2 | 1.6480 | 515 | 34.3 |
| 228830 | 2020-08-30 12:56:59 | 3.2 | 1.6480 | 515 | 34.3 |
| 228831 | 2020-08-30 12:57:00 | 3.1 | 1.5996 | 516 | 34.3 |
| 228832 | 2020-08-30 12:57:03 | 3.3 | 1.7028 | 516 | 34.3 |
228833 rows × 5 columns
#cpves = pd.merge(cpve, speed, left_index=True, right_index=True)
cpves = pd.merge(cpve, speed, on='DateTime')
cpves
| DateTime | Current | Power | Voltage | Elevation | Speed | |
|---|---|---|---|---|---|---|
| 0 | 2020-08-01 12:20:30 | 15.8 | 8.1686 | 517 | 68.7 | 1 |
| 1 | 2020-08-01 12:20:36 | 22.1 | 11.4036 | 516 | 68.6 | 6 |
| 2 | 2020-08-01 12:20:37 | -3.3 | -1.7061 | 517 | 68.6 | 5 |
| 3 | 2020-08-01 12:22:04 | 49.4 | 25.4904 | 516 | 68.5 | 6 |
| 4 | 2020-08-01 12:22:10 | 25.8 | 13.2870 | 515 | 68.5 | 11 |
| ... | ... | ... | ... | ... | ... | ... |
| 190119 | 2020-08-30 12:54:10 | -81.8 | -42.1270 | 515 | 34.3 | 15 |
| 190120 | 2020-08-30 12:54:11 | -58.7 | -30.2892 | 516 | 34.3 | 11 |
| 190121 | 2020-08-30 12:54:12 | 0.2 | 0.1030 | 515 | 34.3 | 10 |
| 190122 | 2020-08-30 12:54:15 | -34.8 | -17.9220 | 515 | 34.3 | 9 |
| 190123 | 2020-08-30 12:54:16 | 15.4 | 7.9310 | 515 | 34.3 | 8 |
190124 rows × 6 columns
#cpveso = pd.merge(cpves, odo, left_index=True, right_index=True, how='left').interpolate(method='linear', limit_direction='both')
#cpveso = pd.merge(cpves, odo, on='DateTime', how='left').interpolate(method='linear', limit_direction='both')
cpveso = pd.merge(cpves, odo, on='DateTime', how='left')
cpveso['Odometer'].interpolate(method='linear', limit_direction='both', inplace=True)
cpveso
| DateTime | Current | Power | Voltage | Elevation | Speed | Odometer | |
|---|---|---|---|---|---|---|---|
| 0 | 2020-08-01 12:20:30 | 15.8 | 8.1686 | 517 | 68.7 | 1 | 136973.7 |
| 1 | 2020-08-01 12:20:36 | 22.1 | 11.4036 | 516 | 68.6 | 6 | 136973.7 |
| 2 | 2020-08-01 12:20:37 | -3.3 | -1.7061 | 517 | 68.6 | 5 | 136973.7 |
| 3 | 2020-08-01 12:22:04 | 49.4 | 25.4904 | 516 | 68.5 | 6 | 136973.7 |
| 4 | 2020-08-01 12:22:10 | 25.8 | 13.2870 | 515 | 68.5 | 11 | 136973.7 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 190119 | 2020-08-30 12:54:10 | -81.8 | -42.1270 | 515 | 34.3 | 15 | 139132.3 |
| 190120 | 2020-08-30 12:54:11 | -58.7 | -30.2892 | 516 | 34.3 | 11 | 139132.3 |
| 190121 | 2020-08-30 12:54:12 | 0.2 | 0.1030 | 515 | 34.3 | 10 | 139132.3 |
| 190122 | 2020-08-30 12:54:15 | -34.8 | -17.9220 | 515 | 34.3 | 9 | 139132.3 |
| 190123 | 2020-08-30 12:54:16 | 15.4 | 7.9310 | 515 | 34.3 | 8 | 139132.3 |
190124 rows × 7 columns
#cpvesot= pd.merge(cpveso, temp, left_index=True, right_index=True, how='left').interpolate(method='time', limit_direction='both')
#cpvesot= pd.merge(cpveso, temp, on='DateTime', how='left').interpolate(method='linear', limit_direction='both')
cpvesot= pd.merge(cpveso, temp, on='DateTime', how='left')
cpvesot['Temperature'].interpolate(method='linear', limit_direction='both', inplace=True)
cpvesot
| DateTime | Current | Power | Voltage | Elevation | Speed | Odometer | Temperature | |
|---|---|---|---|---|---|---|---|---|
| 0 | 2020-08-01 12:20:30 | 15.8 | 8.1686 | 517 | 68.7 | 1 | 136973.7 | 87.0 |
| 1 | 2020-08-01 12:20:36 | 22.1 | 11.4036 | 516 | 68.6 | 6 | 136973.7 | 87.0 |
| 2 | 2020-08-01 12:20:37 | -3.3 | -1.7061 | 517 | 68.6 | 5 | 136973.7 | 87.0 |
| 3 | 2020-08-01 12:22:04 | 49.4 | 25.4904 | 516 | 68.5 | 6 | 136973.7 | 87.0 |
| 4 | 2020-08-01 12:22:10 | 25.8 | 13.2870 | 515 | 68.5 | 11 | 136973.7 | 87.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 190119 | 2020-08-30 12:54:10 | -81.8 | -42.1270 | 515 | 34.3 | 15 | 139132.3 | 75.0 |
| 190120 | 2020-08-30 12:54:11 | -58.7 | -30.2892 | 516 | 34.3 | 11 | 139132.3 | 75.0 |
| 190121 | 2020-08-30 12:54:12 | 0.2 | 0.1030 | 515 | 34.3 | 10 | 139132.3 | 75.0 |
| 190122 | 2020-08-30 12:54:15 | -34.8 | -17.9220 | 515 | 34.3 | 9 | 139132.3 | 75.0 |
| 190123 | 2020-08-30 12:54:16 | 15.4 | 7.9310 | 515 | 34.3 | 8 | 139132.3 | 75.0 |
190124 rows × 8 columns
# v_line = august_threeToseven.plot_bokeh(kind='line', title='Voltage 8/3-8/7', x='Voltage', y='Current', xlabel='Date-Time', ylabel='Voltage', rangetool=True)
# Just reordering the columns.
cpvesot=cpvesot[['DateTime', 'Current', 'Voltage', 'Power', 'Elevation', 'Speed', 'Odometer', 'Temperature']]
# Group by day of the month.
grouped = cpvesot.groupby(pd.Grouper(key='DateTime', axis=0, freq='D'))
# Create dataframe from group to count number of values from feature.
count_ofactivity = grouped.count()
count_ofactivity
| Current | Voltage | Power | Elevation | Speed | Odometer | Temperature | |
|---|---|---|---|---|---|---|---|
| DateTime | |||||||
| 2020-08-01 | 9534 | 9534 | 9534 | 9534 | 9534 | 9534 | 9534 |
| 2020-08-02 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2020-08-03 | 12850 | 12850 | 12850 | 12850 | 12850 | 12850 | 12850 |
| 2020-08-04 | 12530 | 12530 | 12530 | 12530 | 12530 | 12530 | 12530 |
| 2020-08-05 | 12649 | 12649 | 12649 | 12649 | 12649 | 12649 | 12649 |
| 2020-08-06 | 13765 | 13765 | 13765 | 13765 | 13765 | 13765 | 13765 |
| 2020-08-07 | 14199 | 14199 | 14199 | 14199 | 14199 | 14199 | 14199 |
| 2020-08-08 | 8716 | 8716 | 8716 | 8716 | 8716 | 8716 | 8716 |
| 2020-08-09 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2020-08-10 | 12930 | 12930 | 12930 | 12930 | 12930 | 12930 | 12930 |
| 2020-08-11 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2020-08-12 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2020-08-13 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2020-08-14 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2020-08-15 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2020-08-16 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2020-08-17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2020-08-18 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |
| 2020-08-19 | 3151 | 3151 | 3151 | 3151 | 3151 | 3151 | 3151 |
| 2020-08-20 | 7725 | 7725 | 7725 | 7725 | 7725 | 7725 | 7725 |
| 2020-08-21 | 12653 | 12653 | 12653 | 12653 | 12653 | 12653 | 12653 |
| 2020-08-22 | 11193 | 11193 | 11193 | 11193 | 11193 | 11193 | 11193 |
| 2020-08-23 | 5573 | 5573 | 5573 | 5573 | 5573 | 5573 | 5573 |
| 2020-08-24 | 12659 | 12659 | 12659 | 12659 | 12659 | 12659 | 12659 |
| 2020-08-25 | 11731 | 11731 | 11731 | 11731 | 11731 | 11731 | 11731 |
| 2020-08-26 | 29 | 29 | 29 | 29 | 29 | 29 | 29 |
| 2020-08-27 | 7165 | 7165 | 7165 | 7165 | 7165 | 7165 | 7165 |
| 2020-08-28 | 6859 | 6859 | 6859 | 6859 | 6859 | 6859 | 6859 |
| 2020-08-29 | 12675 | 12675 | 12675 | 12675 | 12675 | 12675 | 12675 |
| 2020-08-30 | 1535 | 1535 | 1535 | 1535 | 1535 | 1535 | 1535 |
# Output table to html for website.
html = count_ofactivity.to_html()
# Write html to file.
file = open("count_of_month_activity.html", "w")
file.write(html)
file.close()
#august_threeToseven = cpvesot.index['08-03':'08-07'] # Use when timestamp/datetime is an index or dateTimeIndex object.
# Create dataframe for dates 8/3 - 8/7.
august_threeToseven = cpvesot[(cpvesot['DateTime'] > '2020-08-02') & (cpvesot['DateTime'] < '2020-08-08')]
august_threeToseven
| DateTime | Current | Voltage | Power | Elevation | Speed | Odometer | Temperature | |
|---|---|---|---|---|---|---|---|---|
| 9534 | 2020-08-03 08:56:49 | 25.8 | 515 | 13.2870 | 68.7 | 11 | 137084.466667 | 75.486327 |
| 9535 | 2020-08-03 08:56:50 | -5.2 | 516 | -2.6832 | 68.7 | 9 | 137084.500000 | 75.491500 |
| 9536 | 2020-08-03 08:56:53 | 1.8 | 515 | 0.9270 | 68.7 | 7 | 137084.502564 | 75.496674 |
| 9537 | 2020-08-03 08:56:58 | 27.0 | 514 | 13.8780 | 68.7 | 9 | 137084.505128 | 75.501848 |
| 9538 | 2020-08-03 08:57:02 | 14.4 | 515 | 7.4160 | 68.6 | 8 | 137084.507692 | 75.507021 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 75522 | 2020-08-07 22:03:51 | 35.6 | 493 | 17.5508 | 68.5 | 10 | 137825.756522 | 73.441026 |
| 75523 | 2020-08-07 22:03:52 | 36.6 | 492 | 18.0072 | 68.5 | 11 | 137825.765217 | 73.446154 |
| 75524 | 2020-08-07 22:03:53 | -29.2 | 494 | -14.4248 | 68.5 | 9 | 137825.773913 | 73.451282 |
| 75525 | 2020-08-07 22:03:54 | 18.5 | 494 | 9.1390 | 68.5 | 8 | 137825.782609 | 73.456410 |
| 75526 | 2020-08-07 22:03:59 | -21.9 | 494 | -10.8186 | 68.4 | 8 | 137825.791304 | 73.461538 |
65993 rows × 8 columns
august_threeToseven.isnull().sum()
DateTime 0 Current 0 Voltage 0 Power 0 Elevation 0 Speed 0 Odometer 0 Temperature 0 dtype: int64
# For some reason, after merge and/or grouping, precision increased. Therefore we are rounding it down again here.
august_threeToseven['Temperature'] = august_threeToseven['Temperature'].round(decimals=0)
august_threeToseven['Odometer'] = august_threeToseven['Odometer'].round(decimals=1)
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy This is separate from the ipykernel package so we can avoid doing imports until /usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:4: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy after removing the cwd from sys.path.
# Output table to html for website.
html = august_threeToseven.to_html(max_rows=20)
# Write html to file.
file = open("august_threeToseven_merged.html", "w")
file.write(html)
file.close()
# speed = august_threeToseven['Speed'].diff()*0.2777777778 - convert speed to m/s from km/h
# seconds = august_threeToseven['DateTime'].diff().dt.total_seconds() - convert datetime to seconds.
# acceleration = delta speed / delta seconds
# Km/h to m/s conversion to get acceleration as m/s.
august_threeToseven['Acceleration'] = ((august_threeToseven['Speed'].diff())*0.2777777778/(august_threeToseven['DateTime'].diff()).dt.total_seconds())
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
august_threeToseven
| DateTime | Current | Voltage | Power | Elevation | Speed | Odometer | Temperature | Acceleration | |
|---|---|---|---|---|---|---|---|---|---|
| 9534 | 2020-08-03 08:56:49 | 25.8 | 515 | 13.2870 | 68.7 | 11 | 137084.5 | 75.0 | NaN |
| 9535 | 2020-08-03 08:56:50 | -5.2 | 516 | -2.6832 | 68.7 | 9 | 137084.5 | 75.0 | -0.555556 |
| 9536 | 2020-08-03 08:56:53 | 1.8 | 515 | 0.9270 | 68.7 | 7 | 137084.5 | 75.0 | -0.185185 |
| 9537 | 2020-08-03 08:56:58 | 27.0 | 514 | 13.8780 | 68.7 | 9 | 137084.5 | 76.0 | 0.111111 |
| 9538 | 2020-08-03 08:57:02 | 14.4 | 515 | 7.4160 | 68.6 | 8 | 137084.5 | 76.0 | -0.069444 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 75522 | 2020-08-07 22:03:51 | 35.6 | 493 | 17.5508 | 68.5 | 10 | 137825.8 | 73.0 | 0.055556 |
| 75523 | 2020-08-07 22:03:52 | 36.6 | 492 | 18.0072 | 68.5 | 11 | 137825.8 | 73.0 | 0.277778 |
| 75524 | 2020-08-07 22:03:53 | -29.2 | 494 | -14.4248 | 68.5 | 9 | 137825.8 | 73.0 | -0.555556 |
| 75525 | 2020-08-07 22:03:54 | 18.5 | 494 | 9.1390 | 68.5 | 8 | 137825.8 | 73.0 | -0.277778 |
| 75526 | 2020-08-07 22:03:59 | -21.9 | 494 | -10.8186 | 68.4 | 8 | 137825.8 | 73.0 | 0.000000 |
65993 rows × 9 columns
# Fill-in any NaNs from 'Acceleration' with 0.
august_threeToseven['Acceleration'] = august_threeToseven['Acceleration'].fillna(0)
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy This is separate from the ipykernel package so we can avoid doing imports until
august_threeToseven
| DateTime | Current | Voltage | Power | Elevation | Speed | Odometer | Temperature | Acceleration | |
|---|---|---|---|---|---|---|---|---|---|
| 9534 | 2020-08-03 08:56:49 | 25.8 | 515 | 13.2870 | 68.7 | 11 | 137084.5 | 75.0 | 0.000000 |
| 9535 | 2020-08-03 08:56:50 | -5.2 | 516 | -2.6832 | 68.7 | 9 | 137084.5 | 75.0 | -0.555556 |
| 9536 | 2020-08-03 08:56:53 | 1.8 | 515 | 0.9270 | 68.7 | 7 | 137084.5 | 75.0 | -0.185185 |
| 9537 | 2020-08-03 08:56:58 | 27.0 | 514 | 13.8780 | 68.7 | 9 | 137084.5 | 76.0 | 0.111111 |
| 9538 | 2020-08-03 08:57:02 | 14.4 | 515 | 7.4160 | 68.6 | 8 | 137084.5 | 76.0 | -0.069444 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 75522 | 2020-08-07 22:03:51 | 35.6 | 493 | 17.5508 | 68.5 | 10 | 137825.8 | 73.0 | 0.055556 |
| 75523 | 2020-08-07 22:03:52 | 36.6 | 492 | 18.0072 | 68.5 | 11 | 137825.8 | 73.0 | 0.277778 |
| 75524 | 2020-08-07 22:03:53 | -29.2 | 494 | -14.4248 | 68.5 | 9 | 137825.8 | 73.0 | -0.555556 |
| 75525 | 2020-08-07 22:03:54 | 18.5 | 494 | 9.1390 | 68.5 | 8 | 137825.8 | 73.0 | -0.277778 |
| 75526 | 2020-08-07 22:03:59 | -21.9 | 494 | -10.8186 | 68.4 | 8 | 137825.8 | 73.0 | 0.000000 |
65993 rows × 9 columns
# Output table to html for website.
html = august_threeToseven.to_html(max_rows=20)
# Write html to file.
file = open("august_threeToseven_merged_acc.html", "w")
file.write(html)
file.close()
#august_threeToseven['Elevation Change'] = august_threeToseven['Elevation'].diff()
#august_threeToseven['Odometer Change'] = august_threeToseven['Odometer'].diff()
#august_threeToseven['Grade'] = august_threeToseven['Elevation'][::100].diff()/((august_threeToseven['Odometer'][::100]*.3048).diff())
#august_threeToseven['Angle'] = np.arctan(august_threeToseven['Elevation'].diff()/(august_threeToseven['Odometer']*.3048).diff())
august_threeToseven['Grade'] = np.nan
'''
for i in range(len(august_threeToseven['Odometer']),1):
for j in range(len(august_threeToseven['Odometer']),1):
x = august_threeToseven['Odometer'][j] - august_threeToseven['Odometer'][i]
if x == 1.0:
august_threeToseven['Grade'][j] = (august_threeToseven['Elevation'][j] - august_threeToseven['Elevation'][i])/(august_threeToseven['Odometer'][j] - august_threeToseven['Odometer'][i])*.3048
i=j
'''
i = len(august_threeToseven['Odometer'])
count_1 = 9534
count_2 = 9535
'''
while count_2 < i:
if (august_threeToseven['Odometer'][count_2] - august_threeToseven['Odometer'][count_1]) == .5: # Grade distance marker. e.g. 1/2 mile, 1 mile.
august_threeToseven['Grade'][count_2] = (august_threeToseven['Elevation'][count_2] - august_threeToseven['Elevation'][count_1])/(august_threeToseven['Odometer'][count_2] - august_threeToseven['Odometer'][count_1])*.3048
count_1 = count_2
count_2 += 1
'''
while count_2 < i:
if (august_threeToseven['Odometer'][count_2] - august_threeToseven['Odometer'][count_1]) == .5: # Grade distance marker. e.g. 1/2 mile, 1 mile.
august_threeToseven['Grade'][count_2] = ((august_threeToseven['Elevation'][count_2]*.001 - august_threeToseven['Elevation'][count_1]*.001)/(august_threeToseven['Odometer'][count_2] - august_threeToseven['Odometer'][count_1]))*100
count_1 = count_2
count_2 += 1
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy """Entry point for launching an IPython kernel. /usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:28: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
august_threeToseven
| DateTime | Current | Voltage | Power | Elevation | Speed | Odometer | Temperature | Acceleration | Grade | |
|---|---|---|---|---|---|---|---|---|---|---|
| 9534 | 2020-08-03 08:56:49 | 25.8 | 515 | 13.2870 | 68.7 | 11 | 137084.5 | 75.0 | 0.000000 | NaN |
| 9535 | 2020-08-03 08:56:50 | -5.2 | 516 | -2.6832 | 68.7 | 9 | 137084.5 | 75.0 | -0.555556 | NaN |
| 9536 | 2020-08-03 08:56:53 | 1.8 | 515 | 0.9270 | 68.7 | 7 | 137084.5 | 75.0 | -0.185185 | NaN |
| 9537 | 2020-08-03 08:56:58 | 27.0 | 514 | 13.8780 | 68.7 | 9 | 137084.5 | 76.0 | 0.111111 | NaN |
| 9538 | 2020-08-03 08:57:02 | 14.4 | 515 | 7.4160 | 68.6 | 8 | 137084.5 | 76.0 | -0.069444 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 75522 | 2020-08-07 22:03:51 | 35.6 | 493 | 17.5508 | 68.5 | 10 | 137825.8 | 73.0 | 0.055556 | NaN |
| 75523 | 2020-08-07 22:03:52 | 36.6 | 492 | 18.0072 | 68.5 | 11 | 137825.8 | 73.0 | 0.277778 | NaN |
| 75524 | 2020-08-07 22:03:53 | -29.2 | 494 | -14.4248 | 68.5 | 9 | 137825.8 | 73.0 | -0.555556 | NaN |
| 75525 | 2020-08-07 22:03:54 | 18.5 | 494 | 9.1390 | 68.5 | 8 | 137825.8 | 73.0 | -0.277778 | NaN |
| 75526 | 2020-08-07 22:03:59 | -21.9 | 494 | -10.8186 | 68.4 | 8 | 137825.8 | 73.0 | 0.000000 | NaN |
65993 rows × 10 columns
# NaNs created after extrapolating grade every mile (or half a mile depending on what we want).
august_threeToseven['Grade'].isnull().sum()
65350
# Fill in NaNs backwards from the mile marker.
august_threeToseven['Grade'].fillna(method='bfill', inplace=True)
/usr/local/lib/python3.7/dist-packages/pandas/core/generic.py:6392: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy return self._update_inplace(result)
# Fill in NaNs forward for the last few lines of the dataset. (we are assuming continuation of the same grade towards the end of the dataset).
august_threeToseven['Grade'].fillna(method='ffill', inplace=True)
/usr/local/lib/python3.7/dist-packages/pandas/core/generic.py:6392: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy return self._update_inplace(result)
# Extrapolate angle associated with grade. (may not need it).
august_threeToseven['Angle'] = np.arctan(august_threeToseven['Elevation'].diff()/(august_threeToseven['Odometer']*.3048).diff())
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:3: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy This is separate from the ipykernel package so we can avoid doing imports until
# Fill in NaNs as before.
august_threeToseven['Angle'].fillna(method='bfill', inplace=True)
/usr/local/lib/python3.7/dist-packages/pandas/core/generic.py:6392: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy return self._update_inplace(result)
# Fill in Nans as before.
august_threeToseven['Angle'].fillna(method='ffill', inplace=True)
/usr/local/lib/python3.7/dist-packages/pandas/core/generic.py:6392: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy return self._update_inplace(result)
# Dataset for 8/3-8/7 date range after merge operation. Acceleration, grade and angle extrapolated with NaNs filled.
august_threeToseven
| DateTime | Current | Voltage | Power | Elevation | Speed | Odometer | Temperature | Acceleration | Grade | Angle | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 9534 | 2020-08-03 08:56:49 | 25.8 | 515 | 13.2870 | 68.7 | 11 | 137084.5 | 75.0 | 0.000000 | 0.16 | -1.570796 |
| 9535 | 2020-08-03 08:56:50 | -5.2 | 516 | -2.6832 | 68.7 | 9 | 137084.5 | 75.0 | -0.555556 | 0.16 | -1.570796 |
| 9536 | 2020-08-03 08:56:53 | 1.8 | 515 | 0.9270 | 68.7 | 7 | 137084.5 | 75.0 | -0.185185 | 0.16 | -1.570796 |
| 9537 | 2020-08-03 08:56:58 | 27.0 | 514 | 13.8780 | 68.7 | 9 | 137084.5 | 76.0 | 0.111111 | 0.16 | -1.570796 |
| 9538 | 2020-08-03 08:57:02 | 14.4 | 515 | 7.4160 | 68.6 | 8 | 137084.5 | 76.0 | -0.069444 | 0.16 | -1.570796 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 75522 | 2020-08-07 22:03:51 | 35.6 | 493 | 17.5508 | 68.5 | 10 | 137825.8 | 73.0 | 0.055556 | 0.06 | -1.274942 |
| 75523 | 2020-08-07 22:03:52 | 36.6 | 492 | 18.0072 | 68.5 | 11 | 137825.8 | 73.0 | 0.277778 | 0.06 | -1.570796 |
| 75524 | 2020-08-07 22:03:53 | -29.2 | 494 | -14.4248 | 68.5 | 9 | 137825.8 | 73.0 | -0.555556 | 0.06 | -1.570796 |
| 75525 | 2020-08-07 22:03:54 | 18.5 | 494 | 9.1390 | 68.5 | 8 | 137825.8 | 73.0 | -0.277778 | 0.06 | -1.570796 |
| 75526 | 2020-08-07 22:03:59 | -21.9 | 494 | -10.8186 | 68.4 | 8 | 137825.8 | 73.0 | 0.000000 | 0.06 | -1.570796 |
65993 rows × 11 columns
# Output table to html for website.
html = august_threeToseven.to_html(max_rows=20)
# Write html to file.
file = open("august_threeToseven_merged_final.html", "w")
file.write(html)
file.close()
# Convert DateTime column into dateTimeIndex object.
'''
def my_convert_to_dateTimeIndex(*dataframe):
for x in dataframe:
x.set_index('DateTime', inplace=True)
my_convert_to_dateTimeIndex(current, voltage, power, speed, elev, odo, temp)
'''
august_threeToseven_i = august_threeToseven.set_index('DateTime')
august_threeToseven_i
| Current | Voltage | Power | Elevation | Speed | Odometer | Temperature | Acceleration | Grade | Angle | |
|---|---|---|---|---|---|---|---|---|---|---|
| DateTime | ||||||||||
| 2020-08-03 08:56:49 | 25.8 | 515 | 13.2870 | 68.7 | 11 | 137084.5 | 75.0 | 0.000000 | 0.16 | -1.570796 |
| 2020-08-03 08:56:50 | -5.2 | 516 | -2.6832 | 68.7 | 9 | 137084.5 | 75.0 | -0.555556 | 0.16 | -1.570796 |
| 2020-08-03 08:56:53 | 1.8 | 515 | 0.9270 | 68.7 | 7 | 137084.5 | 75.0 | -0.185185 | 0.16 | -1.570796 |
| 2020-08-03 08:56:58 | 27.0 | 514 | 13.8780 | 68.7 | 9 | 137084.5 | 76.0 | 0.111111 | 0.16 | -1.570796 |
| 2020-08-03 08:57:02 | 14.4 | 515 | 7.4160 | 68.6 | 8 | 137084.5 | 76.0 | -0.069444 | 0.16 | -1.570796 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2020-08-07 22:03:51 | 35.6 | 493 | 17.5508 | 68.5 | 10 | 137825.8 | 73.0 | 0.055556 | 0.06 | -1.274942 |
| 2020-08-07 22:03:52 | 36.6 | 492 | 18.0072 | 68.5 | 11 | 137825.8 | 73.0 | 0.277778 | 0.06 | -1.570796 |
| 2020-08-07 22:03:53 | -29.2 | 494 | -14.4248 | 68.5 | 9 | 137825.8 | 73.0 | -0.555556 | 0.06 | -1.570796 |
| 2020-08-07 22:03:54 | 18.5 | 494 | 9.1390 | 68.5 | 8 | 137825.8 | 73.0 | -0.277778 | 0.06 | -1.570796 |
| 2020-08-07 22:03:59 | -21.9 | 494 | -10.8186 | 68.4 | 8 | 137825.8 | 73.0 | 0.000000 | 0.06 | -1.570796 |
65993 rows × 10 columns
august_threeToseven_i.isnull().sum()
Current 0 Voltage 0 Power 0 Elevation 0 Speed 0 Odometer 0 Temperature 0 Acceleration 0 Grade 0 Angle 0 dtype: int64
august_threeToseven_i_by30Sec = august_threeToseven_i.resample('30S', closed='right').mean()
august_threeToseven_i_by30Sec['DayofWeek'] = august_threeToseven_i_by30Sec.index.day_name()
august_threeToseven_i_byMin = august_threeToseven_i.resample('T', closed='right').mean()
august_threeToseven_i_byMin['DayofWeek'] = august_threeToseven_i_byMin.index.day_name()
august_threeToseven_i_byHour = august_threeToseven_i.resample('H', closed='right').mean()
august_threeToseven_i_byHour['DayofWeek'] = august_threeToseven_i_byHour.index.day_name()
august_threeToseven_i_byDay = august_threeToseven_i.resample('D', closed='right').mean()
august_threeToseven_i_byDay['DayofWeek'] = august_threeToseven_i_byDay.index.day_name()
august_threeToseven_i_by30Sec
| Current | Voltage | Power | Elevation | Speed | Odometer | Temperature | Acceleration | Grade | Angle | DayofWeek | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| DateTime | |||||||||||
| 2020-08-03 08:56:30 | 12.350000 | 515.000000 | 6.352200 | 68.700000 | 9.000000 | 137084.500000 | 75.25 | -0.157407 | 0.16 | -1.570796 | Monday |
| 2020-08-03 08:57:00 | 8.054545 | 514.909091 | 4.138745 | 68.527273 | 6.909091 | 137084.500000 | 76.00 | -0.117705 | 0.16 | -1.570796 | Monday |
| 2020-08-03 08:57:30 | 4.075000 | 515.166667 | 2.080933 | 68.325000 | 7.583333 | 137084.550000 | 76.00 | -0.150463 | 0.16 | 0.130900 | Monday |
| 2020-08-03 08:58:00 | 15.660000 | 514.600000 | 8.058220 | 68.400000 | 3.200000 | 137084.600000 | 76.00 | 0.048789 | 0.16 | 1.570796 | Monday |
| 2020-08-03 08:58:30 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Monday |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2020-08-07 22:01:30 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Friday |
| 2020-08-07 22:02:00 | 0.691667 | 493.666667 | 0.330458 | 68.125000 | 8.000000 | 137825.516667 | 73.00 | -0.159199 | 0.06 | 0.563507 | Friday |
| 2020-08-07 22:02:30 | 32.337500 | 493.000000 | 15.928612 | 68.250000 | 6.125000 | 137825.600000 | 73.00 | 0.217682 | 0.06 | 0.392699 | Friday |
| 2020-08-07 22:03:00 | 3.833333 | 493.416667 | 1.884067 | 68.541667 | 9.250000 | 137825.691667 | 73.00 | -0.078318 | 0.06 | 1.178097 | Friday |
| 2020-08-07 22:03:30 | 9.716667 | 493.333333 | 4.778783 | 68.500000 | 9.166667 | 137825.783333 | 73.00 | -0.083333 | 0.06 | -1.521487 | Friday |
13095 rows × 11 columns
august_threeToseven_i_by30Sec.isnull().sum()
Current 8407 Voltage 8407 Power 8407 Elevation 8407 Speed 8407 Odometer 8407 Temperature 8407 Acceleration 8407 Grade 8407 Angle 8407 DayofWeek 0 dtype: int64
august_threeToseven_i_byMin
| Current | Voltage | Power | Elevation | Speed | Odometer | Temperature | Acceleration | Grade | Angle | DayofWeek | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| DateTime | |||||||||||
| 2020-08-03 08:56:00 | 12.350000 | 515.000000 | 6.352200 | 68.700000 | 9.000000 | 137084.500000 | 75.25 | -0.157407 | 0.160000 | -1.570796 | Monday |
| 2020-08-03 08:57:00 | 5.978261 | 515.043478 | 3.065104 | 68.421739 | 7.260870 | 137084.526087 | 76.00 | -0.134796 | 0.160000 | -0.682955 | Monday |
| 2020-08-03 08:58:00 | 15.660000 | 514.600000 | 8.058220 | 68.400000 | 3.200000 | 137084.600000 | 76.00 | 0.048789 | 0.160000 | 1.570796 | Monday |
| 2020-08-03 08:59:00 | 96.128000 | 512.160000 | 48.443020 | 68.744000 | 25.840000 | 137084.688000 | 76.00 | 0.271667 | 0.160000 | 1.319469 | Monday |
| 2020-08-03 09:00:00 | 25.196552 | 513.965517 | 12.604510 | 69.582759 | 26.896552 | 137085.031034 | 76.00 | -0.146643 | 0.063448 | 0.162496 | Monday |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2020-08-07 21:59:00 | 47.859574 | 491.127660 | 22.554400 | 69.468085 | 36.617021 | 137825.065957 | 73.00 | -0.030864 | 0.060000 | -0.401054 | Friday |
| 2020-08-07 22:00:00 | 13.396429 | 493.000000 | 6.592754 | 68.617857 | 8.750000 | 137825.339286 | 73.00 | -0.050312 | 0.060000 | -1.402497 | Friday |
| 2020-08-07 22:01:00 | 22.666667 | 493.166667 | 11.175433 | 68.283333 | 6.333333 | 137825.433333 | 73.00 | 0.021605 | 0.060000 | -0.261799 | Friday |
| 2020-08-07 22:02:00 | 13.350000 | 493.400000 | 6.569720 | 68.175000 | 7.250000 | 137825.550000 | 73.00 | -0.008447 | 0.060000 | 0.495184 | Friday |
| 2020-08-07 22:03:00 | 5.794444 | 493.388889 | 2.848972 | 68.527778 | 9.222222 | 137825.722222 | 73.00 | -0.079990 | 0.060000 | 0.278236 | Friday |
6548 rows × 11 columns
august_threeToseven_i_byMin.isnull().sum()
Current 3982 Voltage 3982 Power 3982 Elevation 3982 Speed 3982 Odometer 3982 Temperature 3982 Acceleration 3982 Grade 3982 Angle 3982 DayofWeek 0 dtype: int64
august_threeToseven_i_byHour
| Current | Voltage | Power | Elevation | Speed | Odometer | Temperature | Acceleration | Grade | Angle | DayofWeek | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| DateTime | |||||||||||
| 2020-08-03 08:00:00 | 46.814035 | 513.736842 | 23.636365 | 68.580702 | 15.175439 | 137084.601754 | 75.947368 | 0.057994 | 0.160000 | 0.330694 | Monday |
| 2020-08-03 09:00:00 | 38.521206 | 513.417879 | 18.948626 | 63.489744 | 29.519058 | 137095.878032 | 79.410949 | -0.066516 | -0.050159 | 0.066516 | Monday |
| 2020-08-03 10:00:00 | 33.854192 | 512.556138 | 16.615241 | 63.251722 | 25.998503 | 137110.539746 | 81.327844 | -0.083076 | -0.076063 | -0.066298 | Monday |
| 2020-08-03 11:00:00 | 37.017060 | 509.346535 | 18.024716 | 62.597563 | 28.464585 | 137124.246154 | 80.350343 | -0.088393 | 0.051318 | 0.324286 | Monday |
| 2020-08-03 12:00:00 | 35.304762 | 508.163383 | 17.112039 | 63.032594 | 28.745484 | 137137.967898 | 79.414614 | -0.099657 | -0.089918 | -0.091701 | Monday |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2020-08-07 18:00:00 | 33.838226 | 504.390547 | 16.743944 | 35.152570 | 21.752073 | 137789.472803 | 71.455224 | -0.046784 | 0.060000 | -0.117778 | Friday |
| 2020-08-07 19:00:00 | 31.125966 | 501.684636 | 15.258164 | 38.099371 | 22.663073 | 137800.630368 | 69.911950 | -0.058058 | 0.060000 | -0.364729 | Friday |
| 2020-08-07 20:00:00 | 43.628416 | 497.386425 | 21.025600 | 37.448507 | 25.476923 | 137811.416923 | 68.560181 | -0.053425 | 0.060000 | 0.108083 | Friday |
| 2020-08-07 21:00:00 | 52.651971 | 492.988053 | 25.173058 | 62.035245 | 29.617682 | 137821.346595 | 70.952210 | -0.028818 | 0.060000 | 0.587233 | Friday |
| 2020-08-07 22:00:00 | 12.255556 | 493.222222 | 6.032300 | 68.444444 | 8.250000 | 137825.501389 | 73.000000 | -0.040109 | 0.060000 | -0.360122 | Friday |
111 rows × 11 columns
august_threeToseven_i_byHour.isnull().sum()
Current 53 Voltage 53 Power 53 Elevation 53 Speed 53 Odometer 53 Temperature 53 Acceleration 53 Grade 53 Angle 53 DayofWeek 0 dtype: int64
august_threeToseven_i_byDay
| Current | Voltage | Power | Elevation | Speed | Odometer | Temperature | Acceleration | Grade | Angle | DayofWeek | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| DateTime | |||||||||||
| 2020-08-03 | 36.403665 | 506.773463 | 17.722937 | 63.109167 | 26.890739 | 137154.762669 | 78.743891 | -0.094937 | -0.012526 | 0.106010 | Monday |
| 2020-08-04 | 35.905036 | 508.730726 | 17.729420 | 63.016624 | 25.152434 | 137295.707015 | 71.680287 | -0.059047 | -0.008412 | 0.115683 | Tuesday |
| 2020-08-05 | 21.518136 | 508.353467 | 10.559564 | 100.256502 | 23.075263 | 137437.861752 | 72.063167 | -0.097731 | 0.055385 | 0.019556 | Wednesday |
| 2020-08-06 | 32.660748 | 507.858118 | 16.112729 | 45.054050 | 27.341155 | 137597.170926 | 74.825282 | -0.056334 | 0.060000 | 0.046765 | Thursday |
| 2020-08-07 | 38.551349 | 506.292697 | 18.997665 | 38.895274 | 25.559053 | 137755.638334 | 73.990915 | -0.029770 | 0.060000 | -0.158325 | Friday |
# Drop the NaNs from downsampled dataset.
august_threeToseven_i_by30Sec_d = august_threeToseven_i_by30Sec.dropna()
august_threeToseven_i_byMin_d = august_threeToseven_i_byMin.dropna()
august_threeToseven_i_byHour_d = august_threeToseven_i_byHour.dropna()
august_threeToseven_i_by30Sec_d
| Current | Voltage | Power | Elevation | Speed | Odometer | Temperature | Acceleration | Grade | Angle | DayofWeek | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| DateTime | |||||||||||
| 2020-08-03 08:56:30 | 12.350000 | 515.000000 | 6.352200 | 68.700000 | 9.000000 | 137084.500000 | 75.25 | -0.157407 | 0.16 | -1.570796 | Monday |
| 2020-08-03 08:57:00 | 8.054545 | 514.909091 | 4.138745 | 68.527273 | 6.909091 | 137084.500000 | 76.00 | -0.117705 | 0.16 | -1.570796 | Monday |
| 2020-08-03 08:57:30 | 4.075000 | 515.166667 | 2.080933 | 68.325000 | 7.583333 | 137084.550000 | 76.00 | -0.150463 | 0.16 | 0.130900 | Monday |
| 2020-08-03 08:58:00 | 15.660000 | 514.600000 | 8.058220 | 68.400000 | 3.200000 | 137084.600000 | 76.00 | 0.048789 | 0.16 | 1.570796 | Monday |
| 2020-08-03 08:59:00 | 21.550000 | 514.500000 | 10.961250 | 68.400000 | 14.000000 | 137084.600000 | 76.00 | -0.250000 | 0.16 | 1.570796 | Monday |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2020-08-07 22:01:00 | 22.666667 | 493.166667 | 11.175433 | 68.283333 | 6.333333 | 137825.433333 | 73.00 | 0.021605 | 0.06 | -0.261799 | Friday |
| 2020-08-07 22:02:00 | 0.691667 | 493.666667 | 0.330458 | 68.125000 | 8.000000 | 137825.516667 | 73.00 | -0.159199 | 0.06 | 0.563507 | Friday |
| 2020-08-07 22:02:30 | 32.337500 | 493.000000 | 15.928612 | 68.250000 | 6.125000 | 137825.600000 | 73.00 | 0.217682 | 0.06 | 0.392699 | Friday |
| 2020-08-07 22:03:00 | 3.833333 | 493.416667 | 1.884067 | 68.541667 | 9.250000 | 137825.691667 | 73.00 | -0.078318 | 0.06 | 1.178097 | Friday |
| 2020-08-07 22:03:30 | 9.716667 | 493.333333 | 4.778783 | 68.500000 | 9.166667 | 137825.783333 | 73.00 | -0.083333 | 0.06 | -1.521487 | Friday |
4688 rows × 11 columns
august_threeToseven_i_by30Sec_d.isnull().sum()
Current 0 Voltage 0 Power 0 Elevation 0 Speed 0 Odometer 0 Temperature 0 Acceleration 0 Grade 0 Angle 0 DayofWeek 0 dtype: int64
august_threeToseven_i_byMin_d
| Current | Voltage | Power | Elevation | Speed | Odometer | Temperature | Acceleration | Grade | Angle | DayofWeek | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| DateTime | |||||||||||
| 2020-08-03 08:56:00 | 12.350000 | 515.000000 | 6.352200 | 68.700000 | 9.000000 | 137084.500000 | 75.25 | -0.157407 | 0.160000 | -1.570796 | Monday |
| 2020-08-03 08:57:00 | 5.978261 | 515.043478 | 3.065104 | 68.421739 | 7.260870 | 137084.526087 | 76.00 | -0.134796 | 0.160000 | -0.682955 | Monday |
| 2020-08-03 08:58:00 | 15.660000 | 514.600000 | 8.058220 | 68.400000 | 3.200000 | 137084.600000 | 76.00 | 0.048789 | 0.160000 | 1.570796 | Monday |
| 2020-08-03 08:59:00 | 96.128000 | 512.160000 | 48.443020 | 68.744000 | 25.840000 | 137084.688000 | 76.00 | 0.271667 | 0.160000 | 1.319469 | Monday |
| 2020-08-03 09:00:00 | 25.196552 | 513.965517 | 12.604510 | 69.582759 | 26.896552 | 137085.031034 | 76.00 | -0.146643 | 0.063448 | 0.162496 | Monday |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2020-08-07 21:59:00 | 47.859574 | 491.127660 | 22.554400 | 69.468085 | 36.617021 | 137825.065957 | 73.00 | -0.030864 | 0.060000 | -0.401054 | Friday |
| 2020-08-07 22:00:00 | 13.396429 | 493.000000 | 6.592754 | 68.617857 | 8.750000 | 137825.339286 | 73.00 | -0.050312 | 0.060000 | -1.402497 | Friday |
| 2020-08-07 22:01:00 | 22.666667 | 493.166667 | 11.175433 | 68.283333 | 6.333333 | 137825.433333 | 73.00 | 0.021605 | 0.060000 | -0.261799 | Friday |
| 2020-08-07 22:02:00 | 13.350000 | 493.400000 | 6.569720 | 68.175000 | 7.250000 | 137825.550000 | 73.00 | -0.008447 | 0.060000 | 0.495184 | Friday |
| 2020-08-07 22:03:00 | 5.794444 | 493.388889 | 2.848972 | 68.527778 | 9.222222 | 137825.722222 | 73.00 | -0.079990 | 0.060000 | 0.278236 | Friday |
2566 rows × 11 columns
august_threeToseven_i_byMin_d.isnull().sum()
Current 0 Voltage 0 Power 0 Elevation 0 Speed 0 Odometer 0 Temperature 0 Acceleration 0 Grade 0 Angle 0 DayofWeek 0 dtype: int64
august_threeToseven_i_byHour_d
| Current | Voltage | Power | Elevation | Speed | Odometer | Temperature | Acceleration | Grade | Angle | DayofWeek | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| DateTime | |||||||||||
| 2020-08-03 08:00:00 | 46.814035 | 513.736842 | 23.636365 | 68.580702 | 15.175439 | 137084.601754 | 75.947368 | 0.057994 | 0.160000 | 0.330694 | Monday |
| 2020-08-03 09:00:00 | 38.521206 | 513.417879 | 18.948626 | 63.489744 | 29.519058 | 137095.878032 | 79.410949 | -0.066516 | -0.050159 | 0.066516 | Monday |
| 2020-08-03 10:00:00 | 33.854192 | 512.556138 | 16.615241 | 63.251722 | 25.998503 | 137110.539746 | 81.327844 | -0.083076 | -0.076063 | -0.066298 | Monday |
| 2020-08-03 11:00:00 | 37.017060 | 509.346535 | 18.024716 | 62.597563 | 28.464585 | 137124.246154 | 80.350343 | -0.088393 | 0.051318 | 0.324286 | Monday |
| 2020-08-03 12:00:00 | 35.304762 | 508.163383 | 17.112039 | 63.032594 | 28.745484 | 137137.967898 | 79.414614 | -0.099657 | -0.089918 | -0.091701 | Monday |
| 2020-08-03 13:00:00 | 41.483814 | 507.389866 | 20.223666 | 62.560873 | 27.647431 | 137151.611541 | 79.000000 | -0.110108 | 0.073498 | 0.320698 | Monday |
| 2020-08-03 14:00:00 | 36.016556 | 507.019956 | 17.484994 | 63.096009 | 26.174427 | 137165.116704 | 79.000000 | -0.110436 | -0.088352 | -0.189064 | Monday |
| 2020-08-03 15:00:00 | 33.526890 | 505.594151 | 16.305594 | 63.116334 | 25.773181 | 137179.383524 | 78.312411 | -0.131114 | 0.008745 | 0.229223 | Monday |
| 2020-08-03 16:00:00 | 31.784441 | 503.152972 | 15.395052 | 62.983916 | 25.363636 | 137192.911976 | 78.000000 | -0.119236 | -0.086941 | -0.098267 | Monday |
| 2020-08-03 17:00:00 | 40.957279 | 499.641176 | 19.877888 | 62.424338 | 24.427941 | 137206.601691 | 76.535294 | -0.080850 | 0.098735 | 0.423423 | Monday |
| 2020-08-03 18:00:00 | 32.318680 | 496.193026 | 15.493625 | 65.070610 | 27.262765 | 137217.505106 | 74.469489 | -0.053260 | 0.017534 | 0.046266 | Monday |
| 2020-08-04 09:00:00 | 14.171096 | 514.841639 | 7.009667 | 64.074419 | 24.527132 | 137230.335770 | 72.784053 | -0.067984 | -0.083832 | -0.214788 | Tuesday |
| 2020-08-04 10:00:00 | 24.789373 | 514.191638 | 12.268808 | 62.650348 | 24.995645 | 137244.011672 | 73.208188 | -0.095027 | 0.093641 | 0.443237 | Tuesday |
| 2020-08-04 11:00:00 | 15.863989 | 513.451898 | 7.769582 | 63.091880 | 24.345102 | 137257.665137 | 73.742277 | -0.093983 | -0.063177 | -0.107962 | Tuesday |
| 2020-08-04 12:00:00 | 19.421619 | 511.042105 | 9.546865 | 62.361134 | 22.965992 | 137271.361862 | 73.000000 | -0.111373 | 0.057425 | 0.386548 | Tuesday |
| 2020-08-04 13:00:00 | 21.392641 | 509.394930 | 10.638601 | 62.997874 | 21.009812 | 137284.709975 | 72.471791 | -0.053877 | -0.101096 | -0.187737 | Tuesday |
| 2020-08-04 14:00:00 | 31.803942 | 508.236515 | 15.829201 | 61.778320 | 23.629668 | 137296.935477 | 72.000000 | -0.055656 | -0.028610 | 0.187254 | Tuesday |
| 2020-08-04 15:00:00 | 55.059967 | 506.877968 | 26.996885 | 63.616731 | 28.764771 | 137310.302650 | 71.272225 | -0.091209 | -0.005864 | 0.072158 | Tuesday |
| 2020-08-04 16:00:00 | 57.252479 | 506.455923 | 28.312977 | 62.233058 | 27.650826 | 137325.361364 | 70.540634 | -0.042165 | 0.040083 | 0.313111 | Tuesday |
| 2020-08-04 17:00:00 | 45.226893 | 505.129057 | 22.444718 | 63.104019 | 25.133694 | 137339.027125 | 70.000000 | 0.008309 | -0.090634 | -0.123533 | Tuesday |
| 2020-08-04 18:00:00 | 50.099342 | 502.141185 | 24.744969 | 63.999854 | 25.700073 | 137352.958157 | 69.174835 | 0.007206 | 0.061726 | 0.294938 | Tuesday |
| 2020-08-05 10:00:00 | 21.041837 | 514.171429 | 10.462989 | 81.632449 | 26.853061 | 137364.051837 | 71.722449 | -0.140669 | 0.302408 | 0.816967 | Wednesday |
| 2020-08-05 11:00:00 | 8.896233 | 514.537671 | 4.268589 | 101.479538 | 22.594178 | 137375.806421 | 76.135274 | -0.136814 | 0.025805 | 0.068159 | Wednesday |
| 2020-08-05 12:00:00 | 13.946825 | 513.800705 | 6.873253 | 100.665432 | 22.413580 | 137389.855556 | 76.704586 | -0.096275 | 0.156138 | 0.081922 | Wednesday |
| 2020-08-05 13:00:00 | 7.007674 | 511.950388 | 3.315924 | 101.613256 | 21.929457 | 137404.091473 | 76.000000 | -0.115491 | -0.130884 | 0.048790 | Wednesday |
| 2020-08-05 14:00:00 | 10.501439 | 509.821043 | 5.150133 | 101.641367 | 21.139388 | 137418.857914 | 74.933453 | -0.108972 | 0.060000 | 0.005361 | Wednesday |
| 2020-08-05 15:00:00 | 21.275688 | 508.595594 | 10.625085 | 102.677419 | 20.964595 | 137433.566562 | 73.378442 | -0.062049 | 0.060000 | -0.094987 | Wednesday |
| 2020-08-05 16:00:00 | 34.078746 | 507.549712 | 16.895449 | 101.891210 | 23.551873 | 137449.214265 | 71.508646 | -0.053517 | 0.060000 | -0.060979 | Wednesday |
| 2020-08-05 17:00:00 | 34.723362 | 506.861718 | 17.061153 | 101.540102 | 24.740175 | 137464.035298 | 69.611354 | -0.098567 | 0.060000 | 0.078358 | Wednesday |
| 2020-08-05 18:00:00 | 33.505707 | 504.967846 | 16.461211 | 101.060772 | 24.642283 | 137478.677090 | 68.100482 | -0.087230 | 0.060000 | 0.090408 | Wednesday |
| 2020-08-05 19:00:00 | 29.734047 | 501.905782 | 14.486554 | 101.741899 | 24.439686 | 137493.666167 | 66.586010 | -0.101113 | 0.060000 | 0.004044 | Wednesday |
| 2020-08-05 20:00:00 | 12.300257 | 500.090090 | 5.773215 | 92.215573 | 21.742600 | 137505.513771 | 68.133848 | -0.115046 | 0.060000 | -0.533841 | Wednesday |
| 2020-08-06 09:00:00 | 34.506259 | 514.046200 | 17.142867 | 52.558122 | 36.284650 | 137516.507601 | 70.980626 | -0.024839 | 0.060000 | -0.793634 | Thursday |
| 2020-08-06 10:00:00 | 18.822663 | 514.557319 | 9.362231 | 43.937478 | 25.476190 | 137530.517813 | 74.338624 | -0.059337 | 0.060000 | -0.097963 | Thursday |
| 2020-08-06 11:00:00 | 35.751583 | 512.692806 | 17.912338 | 43.350000 | 26.176978 | 137546.448777 | 78.403597 | -0.026951 | 0.060000 | 0.155910 | Thursday |
| 2020-08-06 12:00:00 | 25.443069 | 510.267913 | 12.667931 | 45.059190 | 24.342679 | 137562.534657 | 78.693925 | -0.070830 | 0.060000 | 0.229588 | Thursday |
| 2020-08-06 13:00:00 | 32.365566 | 508.588147 | 15.969248 | 42.574569 | 28.544636 | 137579.300900 | 76.720930 | -0.050146 | 0.060000 | -0.105393 | Thursday |
| 2020-08-06 14:00:00 | 34.924432 | 507.964286 | 17.268690 | 43.965260 | 27.553571 | 137593.977679 | 75.594156 | -0.050363 | 0.060000 | 0.075718 | Thursday |
| 2020-08-06 15:00:00 | 39.198819 | 507.532639 | 19.503587 | 44.341181 | 26.297917 | 137608.423333 | 74.938194 | -0.020428 | 0.060000 | 0.193928 | Thursday |
| 2020-08-06 16:00:00 | 38.261431 | 506.784112 | 18.896410 | 44.062159 | 26.335355 | 137624.931898 | 74.000000 | -0.046596 | 0.060000 | 0.107190 | Thursday |
| 2020-08-06 17:00:00 | 36.236802 | 504.777353 | 17.635703 | 43.813160 | 28.289212 | 137641.150115 | 73.086458 | -0.104021 | 0.060000 | 0.056433 | Thursday |
| 2020-08-06 18:00:00 | 28.410116 | 502.413900 | 13.651057 | 44.797452 | 27.369112 | 137656.227413 | 71.684170 | -0.091439 | 0.060000 | -0.044515 | Thursday |
| 2020-08-06 19:00:00 | 31.893981 | 498.779612 | 15.507713 | 52.681068 | 28.896117 | 137671.443786 | 72.160194 | -0.071418 | 0.060000 | 0.340689 | Thursday |
| 2020-08-07 08:00:00 | 0.000000 | 519.000000 | 0.000000 | 68.600000 | 0.000000 | 137678.000000 | 73.000000 | -0.000050 | 0.060000 | -1.419560 | Friday |
| 2020-08-07 09:00:00 | 38.225905 | 513.518106 | 19.029594 | 67.855153 | 37.983287 | 137681.140111 | 73.272981 | -0.029196 | 0.060000 | -0.095829 | Friday |
| 2020-08-07 10:00:00 | 30.960066 | 513.792904 | 15.443820 | 38.624917 | 27.594059 | 137693.767409 | 74.741749 | -0.050367 | 0.060000 | -0.207682 | Friday |
| 2020-08-07 11:00:00 | 42.065748 | 511.545398 | 20.831491 | 35.079274 | 28.574828 | 137708.894490 | 77.127740 | -0.026501 | 0.060000 | -0.183004 | Friday |
| 2020-08-07 12:00:00 | 37.900451 | 509.049594 | 18.724493 | 36.138593 | 24.832281 | 137722.693598 | 77.511271 | -0.037833 | 0.060000 | -0.310192 | Friday |
| 2020-08-07 13:00:00 | 40.134566 | 508.294798 | 19.951246 | 36.105780 | 25.906358 | 137732.166821 | 77.000000 | -0.019414 | 0.060000 | -0.293179 | Friday |
| 2020-08-07 14:00:00 | 41.384190 | 507.878985 | 20.554421 | 36.246389 | 24.253741 | 137743.984906 | 76.247885 | 0.013043 | 0.060000 | -0.108315 | Friday |
| 2020-08-07 15:00:00 | 41.040677 | 507.409913 | 20.330589 | 35.592054 | 26.049567 | 137758.448623 | 75.296617 | 0.003615 | 0.060000 | -0.362049 | Friday |
| 2020-08-07 16:00:00 | 38.863512 | 506.844815 | 19.273525 | 35.873354 | 22.863740 | 137771.240802 | 73.789553 | -0.032756 | 0.060000 | -0.138330 | Friday |
| 2020-08-07 17:00:00 | 27.768350 | 506.089226 | 13.638872 | 37.598653 | 24.693603 | 137780.021044 | 72.616162 | -0.041067 | 0.060000 | -0.502220 | Friday |
| 2020-08-07 18:00:00 | 33.838226 | 504.390547 | 16.743944 | 35.152570 | 21.752073 | 137789.472803 | 71.455224 | -0.046784 | 0.060000 | -0.117778 | Friday |
| 2020-08-07 19:00:00 | 31.125966 | 501.684636 | 15.258164 | 38.099371 | 22.663073 | 137800.630368 | 69.911950 | -0.058058 | 0.060000 | -0.364729 | Friday |
| 2020-08-07 20:00:00 | 43.628416 | 497.386425 | 21.025600 | 37.448507 | 25.476923 | 137811.416923 | 68.560181 | -0.053425 | 0.060000 | 0.108083 | Friday |
| 2020-08-07 21:00:00 | 52.651971 | 492.988053 | 25.173058 | 62.035245 | 29.617682 | 137821.346595 | 70.952210 | -0.028818 | 0.060000 | 0.587233 | Friday |
| 2020-08-07 22:00:00 | 12.255556 | 493.222222 | 6.032300 | 68.444444 | 8.250000 | 137825.501389 | 73.000000 | -0.040109 | 0.060000 | -0.360122 | Friday |
august_threeToseven_i_byHour_d.isnull().sum()
Current 0 Voltage 0 Power 0 Elevation 0 Speed 0 Odometer 0 Temperature 0 Acceleration 0 Grade 0 Angle 0 DayofWeek 0 dtype: int64
august_threeToseven_i_byDay
| Current | Voltage | Power | Elevation | Speed | Odometer | Temperature | Acceleration | Grade | Angle | DayofWeek | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| DateTime | |||||||||||
| 2020-08-03 | 36.403665 | 506.773463 | 17.722937 | 63.109167 | 26.890739 | 137154.762669 | 78.743891 | -0.094937 | -0.012526 | 0.106010 | Monday |
| 2020-08-04 | 35.905036 | 508.730726 | 17.729420 | 63.016624 | 25.152434 | 137295.707015 | 71.680287 | -0.059047 | -0.008412 | 0.115683 | Tuesday |
| 2020-08-05 | 21.518136 | 508.353467 | 10.559564 | 100.256502 | 23.075263 | 137437.861752 | 72.063167 | -0.097731 | 0.055385 | 0.019556 | Wednesday |
| 2020-08-06 | 32.660748 | 507.858118 | 16.112729 | 45.054050 | 27.341155 | 137597.170926 | 74.825282 | -0.056334 | 0.060000 | 0.046765 | Thursday |
| 2020-08-07 | 38.551349 | 506.292697 | 18.997665 | 38.895274 | 25.559053 | 137755.638334 | 73.990915 | -0.029770 | 0.060000 | -0.158325 | Friday |
august_threeToseven_i_byDay.isnull().sum()
Current 0 Voltage 0 Power 0 Elevation 0 Speed 0 Odometer 0 Temperature 0 Acceleration 0 Grade 0 Angle 0 DayofWeek 0 dtype: int64
# To confirm suspicion of gap in data collection from sensor, i.e. the logger didn't collect data for a period of certain time.
# Downsampled data from above code(before dropping NaNs) shows gaps(NaNs) due to gaps in the original dataset.
# In other words, nothing to downsample in the original hence the NaNs in the downsampled dataset.
# The two immediate tables confirm this notion. The NaNs encountered is due to the gaps in (minutes for example in this case) in the original
# dataset.
# This is the nature of downsampling in general.
sample = august_threeToseven_i_byMin['2020-08-03':'2020-08-04']
sample
| Current | Voltage | Power | Elevation | Speed | Odometer | Temperature | Acceleration | Grade | Angle | DayofWeek | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| DateTime | |||||||||||
| 2020-08-03 08:56:00 | 12.350000 | 515.000000 | 6.352200 | 68.700000 | 9.000000 | 137084.500000 | 75.25 | -0.157407 | 0.160000 | -1.570796 | Monday |
| 2020-08-03 08:57:00 | 5.978261 | 515.043478 | 3.065104 | 68.421739 | 7.260870 | 137084.526087 | 76.00 | -0.134796 | 0.160000 | -0.682955 | Monday |
| 2020-08-03 08:58:00 | 15.660000 | 514.600000 | 8.058220 | 68.400000 | 3.200000 | 137084.600000 | 76.00 | 0.048789 | 0.160000 | 1.570796 | Monday |
| 2020-08-03 08:59:00 | 96.128000 | 512.160000 | 48.443020 | 68.744000 | 25.840000 | 137084.688000 | 76.00 | 0.271667 | 0.160000 | 1.319469 | Monday |
| 2020-08-03 09:00:00 | 25.196552 | 513.965517 | 12.604510 | 69.582759 | 26.896552 | 137085.031034 | 76.00 | -0.146643 | 0.063448 | 0.162496 | Monday |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2020-08-04 23:55:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Tuesday |
| 2020-08-04 23:56:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Tuesday |
| 2020-08-04 23:57:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Tuesday |
| 2020-08-04 23:58:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Tuesday |
| 2020-08-04 23:59:00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | Tuesday |
2344 rows × 11 columns
sample.isnull().sum()
Current 1356 Voltage 1356 Power 1356 Elevation 1356 Speed 1356 Odometer 1356 Temperature 1356 Acceleration 1356 Grade 1356 Angle 1356 DayofWeek 0 dtype: int64
august_threeToseven_i['2020-08-03 09:08:00':'2020-08-03 09:20:00']
| Current | Voltage | Power | Elevation | Speed | Odometer | Temperature | Acceleration | Grade | Angle | |
|---|---|---|---|---|---|---|---|---|---|---|
| DateTime | ||||||||||
| 2020-08-03 09:08:02 | -302.3 | 522 | -157.8006 | 59.8 | 32 | 137089.3 | 77.0 | -1.388889 | -0.50 | -1.570796 |
| 2020-08-03 09:08:03 | -181.0 | 521 | -94.3010 | 59.8 | 27 | 137089.4 | 77.0 | -1.388889 | -0.50 | 0.000000 |
| 2020-08-03 09:08:04 | -247.6 | 522 | -129.2472 | 59.8 | 21 | 137089.4 | 77.0 | -1.666667 | -0.50 | -1.274942 |
| 2020-08-03 09:08:05 | -104.6 | 520 | -54.3920 | 59.8 | 14 | 137089.4 | 77.0 | -1.944444 | -0.50 | -1.274942 |
| 2020-08-03 09:08:06 | 217.6 | 516 | 112.2816 | 59.8 | 16 | 137089.4 | 77.0 | 0.555556 | -0.50 | -1.274942 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2020-08-03 09:19:53 | 159.4 | 510 | 81.2940 | 62.8 | 37 | 137092.8 | 78.0 | 0.833333 | 0.32 | 1.570796 |
| 2020-08-03 09:19:56 | -208.5 | 515 | -107.3775 | 63.0 | 35 | 137092.8 | 78.0 | -0.185185 | 0.32 | 1.570796 |
| 2020-08-03 09:19:57 | -209.5 | 518 | -108.5210 | 63.0 | 32 | 137092.8 | 78.0 | -0.833333 | 0.32 | 1.274942 |
| 2020-08-03 09:19:59 | -161.7 | 518 | -83.7606 | 63.1 | 23 | 137092.9 | 78.0 | -1.250000 | 0.32 | 1.274942 |
| 2020-08-03 09:20:00 | -144.0 | 519 | -74.7360 | 63.2 | 17 | 137092.9 | 78.0 | -1.666667 | 0.32 | 1.570796 |
177 rows × 10 columns
#from sklearn import preprocessing
#august_threeToseven_i_sckn = august_threeToseven_i.copy()
#august_threeToseven_i_sckn = preprocessing.normalize(august_threeToseven_i_sckn.columns[0:8], norm='l2')
# Normalize. (may not be needed until modeling).
# Copy august_threeToseven first and then assign it to august_threeToseven_i_n.
# This syntax does not work: august_threeToseven_i_n = august_threeToseven. Any changes to august_threeToseven_i_n will affect august_threeToseven.
august_threeToseven_i_n = august_threeToseven_i.copy()
def my_normalize(series):
return (series - series.min()) / (series.max() - series.min())
for col in august_threeToseven_i_n:
august_threeToseven_i_n[col] = my_normalize(august_threeToseven_i_n[col])
august_threeToseven_i_n
| Current | Voltage | Power | Elevation | Speed | Odometer | Temperature | Acceleration | Grade | Angle | |
|---|---|---|---|---|---|---|---|---|---|---|
| DateTime | ||||||||||
| 2020-08-03 08:56:49 | 0.494582 | 0.800 | 0.496096 | 0.366893 | 0.114583 | 0.0 | 0.5625 | 0.560000 | 0.389397 | 0.000000 |
| 2020-08-03 08:56:50 | 0.461967 | 0.825 | 0.462610 | 0.366893 | 0.093750 | 0.0 | 0.5625 | 0.480000 | 0.389397 | 0.000000 |
| 2020-08-03 08:56:53 | 0.469332 | 0.800 | 0.470180 | 0.366893 | 0.072917 | 0.0 | 0.5625 | 0.533333 | 0.389397 | 0.000000 |
| 2020-08-03 08:56:58 | 0.495844 | 0.775 | 0.497336 | 0.366893 | 0.093750 | 0.0 | 0.6250 | 0.576000 | 0.389397 | 0.000000 |
| 2020-08-03 08:57:02 | 0.482588 | 0.800 | 0.483786 | 0.365924 | 0.083333 | 0.0 | 0.6250 | 0.550000 | 0.389397 | 0.000000 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2020-08-07 22:03:51 | 0.504892 | 0.250 | 0.505037 | 0.364956 | 0.104167 | 1.0 | 0.4375 | 0.568000 | 0.380256 | 0.094173 |
| 2020-08-07 22:03:52 | 0.505944 | 0.225 | 0.505994 | 0.364956 | 0.114583 | 1.0 | 0.4375 | 0.600000 | 0.380256 | 0.000000 |
| 2020-08-07 22:03:53 | 0.436718 | 0.275 | 0.437990 | 0.364956 | 0.093750 | 1.0 | 0.4375 | 0.480000 | 0.380256 | 0.000000 |
| 2020-08-07 22:03:54 | 0.486902 | 0.275 | 0.487399 | 0.364956 | 0.083333 | 1.0 | 0.4375 | 0.520000 | 0.380256 | 0.000000 |
| 2020-08-07 22:03:59 | 0.444398 | 0.275 | 0.445552 | 0.363988 | 0.083333 | 1.0 | 0.4375 | 0.560000 | 0.380256 | 0.000000 |
65993 rows × 10 columns
august_threeToseven_i_r = august_threeToseven_i.resample('2S').interpolate(method='linear', limit_direction='both')
august_threeToseven_i_r
| Current | Voltage | Power | Elevation | Speed | Odometer | Temperature | Acceleration | Grade | Angle | |
|---|---|---|---|---|---|---|---|---|---|---|
| DateTime | ||||||||||
| 2020-08-03 08:56:48 | -5.200000 | 516.000000 | -2.683200 | 68.700000 | 9.000000 | 137084.500000 | 75.00 | -0.555556 | 0.16 | -1.570796 |
| 2020-08-03 08:56:50 | -5.200000 | 516.000000 | -2.683200 | 68.700000 | 9.000000 | 137084.500000 | 75.00 | -0.555556 | 0.16 | -1.570796 |
| 2020-08-03 08:56:52 | 2.850000 | 515.500000 | 1.457100 | 68.700000 | 9.000000 | 137084.500000 | 75.25 | -0.388889 | 0.16 | -1.570796 |
| 2020-08-03 08:56:54 | 10.900000 | 515.000000 | 5.597400 | 68.700000 | 9.000000 | 137084.500000 | 75.50 | -0.222222 | 0.16 | -1.570796 |
| 2020-08-03 08:56:56 | 18.950000 | 514.500000 | 9.737700 | 68.700000 | 9.000000 | 137084.500000 | 75.75 | -0.055556 | 0.16 | -1.570796 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2020-08-07 22:03:50 | 30.633333 | 492.333333 | 15.077833 | 68.533333 | 10.333333 | 137825.766667 | 73.00 | 0.185185 | 0.06 | -1.570796 |
| 2020-08-07 22:03:52 | 36.600000 | 492.000000 | 18.007200 | 68.500000 | 11.000000 | 137825.800000 | 73.00 | 0.277778 | 0.06 | -1.570796 |
| 2020-08-07 22:03:54 | 18.500000 | 494.000000 | 9.139000 | 68.500000 | 8.000000 | 137825.800000 | 73.00 | -0.277778 | 0.06 | -1.570796 |
| 2020-08-07 22:03:56 | 18.500000 | 494.000000 | 9.139000 | 68.500000 | 8.000000 | 137825.800000 | 73.00 | -0.277778 | 0.06 | -1.570796 |
| 2020-08-07 22:03:58 | 18.500000 | 494.000000 | 9.139000 | 68.500000 | 8.000000 | 137825.800000 | 73.00 | -0.277778 | 0.06 | -1.570796 |
196416 rows × 10 columns
august_threeToseven_i.describe()
| Current | Voltage | Power | Elevation | Speed | Odometer | Temperature | Acceleration | Grade | Angle | |
|---|---|---|---|---|---|---|---|---|---|---|
| count | 65993.000000 | 65993.000000 | 65993.000000 | 65993.000000 | 65993.000000 | 65993.000000 | 65993.000000 | 65993.000000 | 65993.000000 | 65993.000000 |
| mean | 33.137240 | 507.570727 | 16.289559 | 61.235867 | 25.636795 | 137457.348340 | 74.282227 | -0.066585 | 0.032004 | 0.022044 |
| std | 158.717499 | 5.965493 | 80.353412 | 22.791902 | 15.013638 | 218.650528 | 3.588487 | 0.886940 | 0.505543 | 1.387512 |
| min | -444.300000 | 483.000000 | -223.308900 | 30.800000 | 0.000000 | 137084.500000 | 66.000000 | -3.888889 | -4.100000 | -1.570796 |
| 25% | -73.500000 | 504.000000 | -37.485000 | 41.900000 | 13.000000 | 137269.600000 | 72.000000 | -0.833333 | 0.060000 | -1.570796 |
| 50% | 31.300000 | 508.000000 | 15.931700 | 60.000000 | 25.000000 | 137454.500000 | 74.000000 | 0.000000 | 0.060000 | 0.000000 |
| 75% | 136.000000 | 512.000000 | 68.690700 | 68.300000 | 37.000000 | 137648.500000 | 77.000000 | 0.555556 | 0.060000 | 1.570796 |
| max | 506.200000 | 523.000000 | 253.606200 | 134.100000 | 96.000000 | 137825.800000 | 82.000000 | 3.055556 | 6.840000 | 1.570796 |
# Output table to html for website.
html = august_threeToseven_i.describe().to_html()
# Write html to file.
file = open("august_threeToseven_i_stats_table.html", "w")
file.write(html)
file.close()
august_threeToseven_i_r.describe()
| Current | Voltage | Power | Elevation | Speed | Odometer | Temperature | Acceleration | Grade | Angle | |
|---|---|---|---|---|---|---|---|---|---|---|
| count | 196416.000000 | 196416.000000 | 196416.000000 | 196416.000000 | 196416.000000 | 196416.000000 | 196416.000000 | 196416.000000 | 196416.000000 | 196416.000000 |
| mean | 25.226245 | 508.303677 | 12.636605 | 64.414670 | 11.141773 | 137450.198183 | 72.932875 | -0.073121 | 0.021751 | -0.266317 |
| std | 79.276076 | 5.748757 | 40.093409 | 16.160651 | 12.568732 | 196.852471 | 3.032447 | 0.490949 | 0.321331 | 1.114017 |
| min | -444.300000 | 483.000000 | -222.744600 | 30.800000 | 0.000000 | 137084.500000 | 66.000000 | -3.888889 | -4.100000 | -1.570796 |
| 25% | 4.392959 | 504.140597 | 2.186202 | 59.100000 | 3.215576 | 137267.700000 | 70.000000 | -0.277778 | -0.020000 | -1.274942 |
| 50% | 13.932202 | 508.675881 | 7.047704 | 68.500000 | 7.476492 | 137432.800000 | 73.000000 | -0.061799 | 0.060000 | -0.486594 |
| 75% | 29.081745 | 512.797275 | 14.873968 | 68.648205 | 11.461538 | 137661.000000 | 74.000000 | 0.274593 | 0.060000 | 0.579994 |
| max | 493.000000 | 523.000000 | 247.486000 | 134.100000 | 96.000000 | 137825.800000 | 82.000000 | 3.055556 | 6.840000 | 1.570796 |
august_threeToseven_i.corr()
| Current | Voltage | Power | Elevation | Speed | Odometer | Temperature | Acceleration | Grade | Angle | |
|---|---|---|---|---|---|---|---|---|---|---|
| Current | 1.000000 | -0.559703 | 0.999913 | -0.029258 | 0.065301 | 0.007862 | -0.006243 | 0.820136 | 0.031464 | 0.071819 |
| Voltage | -0.559703 | 1.000000 | -0.557591 | 0.036582 | -0.191712 | -0.191624 | 0.344956 | -0.489877 | -0.052789 | -0.094178 |
| Power | 0.999913 | -0.557591 | 1.000000 | -0.028827 | 0.062192 | 0.008133 | -0.006020 | 0.820565 | 0.031612 | 0.071668 |
| Elevation | -0.029258 | 0.036582 | -0.028827 | 1.000000 | -0.080968 | -0.440260 | -0.224043 | -0.019449 | -0.006389 | 0.017761 |
| Speed | 0.065301 | -0.191712 | 0.062192 | -0.080968 | 1.000000 | -0.005225 | 0.027109 | 0.147858 | 0.012867 | -0.004163 |
| Odometer | 0.007862 | -0.191624 | 0.008133 | -0.440260 | -0.005225 | 1.000000 | -0.326741 | 0.021961 | 0.060003 | -0.056327 |
| Temperature | -0.006243 | 0.344956 | -0.006020 | -0.224043 | 0.027109 | -0.326741 | 1.000000 | -0.006090 | -0.024338 | 0.006279 |
| Acceleration | 0.820136 | -0.489877 | 0.820565 | -0.019449 | 0.147858 | 0.021961 | -0.006090 | 1.000000 | -0.003258 | 0.008684 |
| Grade | 0.031464 | -0.052789 | 0.031612 | -0.006389 | 0.012867 | 0.060003 | -0.024338 | -0.003258 | 1.000000 | 0.245784 |
| Angle | 0.071819 | -0.094178 | 0.071668 | 0.017761 | -0.004163 | -0.056327 | 0.006279 | 0.008684 | 0.245784 | 1.000000 |
# Output table to html for website.
html = august_threeToseven_i.corr().to_html()
# Write html to file.
file = open("august_threeToseven_i_corr.html", "w")
file.write(html)
file.close()
august_threeToseven_i_n.corr()
| Current | Voltage | Power | Elevation | Speed | Odometer | Temperature | Acceleration | Grade | Angle | |
|---|---|---|---|---|---|---|---|---|---|---|
| Current | 1.000000 | -0.559703 | 0.999913 | -0.029258 | 0.065301 | 0.007862 | -0.006243 | 0.820136 | 0.031464 | 0.071819 |
| Voltage | -0.559703 | 1.000000 | -0.557591 | 0.036582 | -0.191712 | -0.191624 | 0.344956 | -0.489877 | -0.052789 | -0.094178 |
| Power | 0.999913 | -0.557591 | 1.000000 | -0.028827 | 0.062192 | 0.008133 | -0.006020 | 0.820565 | 0.031612 | 0.071668 |
| Elevation | -0.029258 | 0.036582 | -0.028827 | 1.000000 | -0.080968 | -0.440260 | -0.224043 | -0.019449 | -0.006389 | 0.017761 |
| Speed | 0.065301 | -0.191712 | 0.062192 | -0.080968 | 1.000000 | -0.005225 | 0.027109 | 0.147858 | 0.012867 | -0.004163 |
| Odometer | 0.007862 | -0.191624 | 0.008133 | -0.440260 | -0.005225 | 1.000000 | -0.326741 | 0.021961 | 0.060003 | -0.056327 |
| Temperature | -0.006243 | 0.344956 | -0.006020 | -0.224043 | 0.027109 | -0.326741 | 1.000000 | -0.006090 | -0.024338 | 0.006279 |
| Acceleration | 0.820136 | -0.489877 | 0.820565 | -0.019449 | 0.147858 | 0.021961 | -0.006090 | 1.000000 | -0.003258 | 0.008684 |
| Grade | 0.031464 | -0.052789 | 0.031612 | -0.006389 | 0.012867 | 0.060003 | -0.024338 | -0.003258 | 1.000000 | 0.245784 |
| Angle | 0.071819 | -0.094178 | 0.071668 | 0.017761 | -0.004163 | -0.056327 | 0.006279 | 0.008684 | 0.245784 | 1.000000 |
!pip install pandas_bokeh
!pip install mlxtend
import pandas_bokeh
from bokeh.embed import components
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, output_file, output_notebook, show
#from bokeh.io import output_notebook, show
from bokeh.models import LinearAxis, Range1d
from bokeh.layouts import gridplot
pandas_bokeh.output_notebook()
Requirement already satisfied: pandas_bokeh in /usr/local/lib/python3.7/dist-packages (0.5.5) Requirement already satisfied: pandas>=0.22.0 in /usr/local/lib/python3.7/dist-packages (from pandas_bokeh) (1.3.5) Requirement already satisfied: bokeh>=2.0 in /usr/local/lib/python3.7/dist-packages (from pandas_bokeh) (2.3.3) Requirement already satisfied: typing-extensions>=3.7.4 in /usr/local/lib/python3.7/dist-packages (from bokeh>=2.0->pandas_bokeh) (3.10.0.2) Requirement already satisfied: pillow>=7.1.0 in /usr/local/lib/python3.7/dist-packages (from bokeh>=2.0->pandas_bokeh) (7.1.2) Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from bokeh>=2.0->pandas_bokeh) (2.8.2) Requirement already satisfied: numpy>=1.11.3 in /usr/local/lib/python3.7/dist-packages (from bokeh>=2.0->pandas_bokeh) (1.19.5) Requirement already satisfied: Jinja2>=2.9 in /usr/local/lib/python3.7/dist-packages (from bokeh>=2.0->pandas_bokeh) (2.11.3) Requirement already satisfied: PyYAML>=3.10 in /usr/local/lib/python3.7/dist-packages (from bokeh>=2.0->pandas_bokeh) (3.13) Requirement already satisfied: tornado>=5.1 in /usr/local/lib/python3.7/dist-packages (from bokeh>=2.0->pandas_bokeh) (5.1.1) Requirement already satisfied: packaging>=16.8 in /usr/local/lib/python3.7/dist-packages (from bokeh>=2.0->pandas_bokeh) (21.3) Requirement already satisfied: MarkupSafe>=0.23 in /usr/local/lib/python3.7/dist-packages (from Jinja2>=2.9->bokeh>=2.0->pandas_bokeh) (2.0.1) Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=16.8->bokeh>=2.0->pandas_bokeh) (3.0.7) Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.22.0->pandas_bokeh) (2018.9) Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.1->bokeh>=2.0->pandas_bokeh) (1.15.0) Requirement already satisfied: mlxtend in /usr/local/lib/python3.7/dist-packages (0.14.0) Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from mlxtend) (57.4.0) Requirement already satisfied: pandas>=0.17.1 in /usr/local/lib/python3.7/dist-packages (from mlxtend) (1.3.5) Requirement already satisfied: numpy>=1.10.4 in /usr/local/lib/python3.7/dist-packages (from mlxtend) (1.19.5) Requirement already satisfied: scipy>=0.17 in /usr/local/lib/python3.7/dist-packages (from mlxtend) (1.4.1) Requirement already satisfied: scikit-learn>=0.18 in /usr/local/lib/python3.7/dist-packages (from mlxtend) (1.0.2) Requirement already satisfied: matplotlib>=1.5.1 in /usr/local/lib/python3.7/dist-packages (from mlxtend) (3.2.2) Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=1.5.1->mlxtend) (0.11.0) Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=1.5.1->mlxtend) (1.3.2) Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=1.5.1->mlxtend) (3.0.7) Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=1.5.1->mlxtend) (2.8.2) Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=0.17.1->mlxtend) (2018.9) Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.1->matplotlib>=1.5.1->mlxtend) (1.15.0) Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.18->mlxtend) (3.1.0) Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.18->mlxtend) (1.1.0)
# Datasets we can work with to graph, etc...
'''
august_threeToseven # Original merged set.
august_threeToseven_i # Datetime set as index. (this is the base-line table)
august_threeToseven_i_by30Sec # Downsampled to 30 seconds.
august_threeToseven_i_by30Sec_d # NaNs dropped.
august_threeToseven_i_byMin # Downsampled to minutes.
august_trheeToseven_i_byMin_d # NaNs dropped.
august_threeToseven_i_byHour # Downsampled to hours.
august_threeToseven_i_byHour_d # NaNs dropped.
august_threeToseven_i_byDay # Downsampled to days.
august_threeToseven_i_n # Normalized.
august_threeToseven_i_r # Upsampled to 2 second intervals with linear interpolation.
current_threeToseven # Original non-merged from 8/3-8/7
voltage_threeToseven # Original non-merged from 8/3-8/7
power_threeToseven # Original non-merged from 8/3-8/7
speed_threeToseven # Original non-merged from 8/3-8/7
elev_threeToseven # Original non-merged from 8/3-8/7
'''
# Groups we can work with to graph, etc...
'''
count_ofactivity
'''
'\ncount_ofactivity\n\n'
# Preparing original raw data slices to compare to merged data of the same slice.
# For comparing original raw dataset with merged dataset to check for integrity(does it look similar) of the merged dataset.
current_threeToseven = current[(current['DateTime'] > '2020-08-02 23:59:59') & (current['DateTime'] < '2020-08-08')]
voltage_threeToseven = voltage[(voltage['DateTime'] > '2020-08-02 23:59:59') & (voltage['DateTime'] < '2020-08-08')]
power_threeToseven = power[(power['DateTime'] > '2020-08-02 23:59:59') & (power['DateTime'] < '2020-08-08')]
speed_threeToseven = speed[(speed['DateTime'] > '2020-08-02 23:59:59') & (speed['DateTime'] < '2020-08-08')]
elev_threeToseven = elev[(elev['DateTime'] > '2020-08-02 23:59:59') & (elev['DateTime'] < '2020-08-08')]
# Original dataset.
c_o = current_threeToseven.plot(x="DateTime", y="Current")
# Merged dataset.
c_m = august_threeToseven_i.plot(y="Current")
# Original dataset.
v_o = voltage_threeToseven.plot(x="DateTime", y="Voltage")
# Merged dataset.
v_m = august_threeToseven_i.plot(y="Voltage")
# Original dataset.
p_o = power_threeToseven.plot(x="DateTime", y="Power")
# Merged dataset.
p_m = august_threeToseven_i.plot(y="Power")
# Original dataset.
s_o = speed_threeToseven.plot(x="DateTime", y="Speed")
# Merged dataset.
s_m = august_threeToseven_i.plot(y="Speed")
# Original dataset.
e_o = elev_threeToseven.plot(x="DateTime", y="Elevation")
# Merged dataset.
e_m = august_threeToseven_i.plot(y="Elevation")
# Datasets we can work with to graph, etc...
'''
august_threeToseven # Original merged set.
august_threeToseven_i # Datetime set as index. (this is the base-line table)
august_threeToseven_i_by30Sec # Downsampled to 30 seconds.
august_threeToseven_i_by30Sec_d # NaNs dropped.
august_threeToseven_i_byMin # Downsampled to minutes.
august_trheeToseven_i_byMin_d # NaNs dropped.
august_threeToseven_i_byHour # Downsampled to hours.
august_threeToseven_i_byHour_d # NaNs dropped.
august_threeToseven_i_byDay # Downsampled to days.
august_threeToseven_i_n # Normalized.
august_threeToseven_i_r # Upsampled to 2 second intervals with linear interpolation.
current_threeToseven # Original non-merged from 8/3-8/7
voltage_threeToseven # Original non-merged from 8/3-8/7
power_threeToseven # Original non-merged from 8/3-8/7
speed_threeToseven # Original non-merged from 8/3-8/7
elev_threeToseven # Original non-merged from 8/3-8/7
'''
# Groups we can work with to graph, etc...
'''
count_ofactivity
'''
'\ncount_ofactivity\n\n'
from bokeh.models.tools import RangeTool
from bokeh.models import HoverTool
from bokeh.models.formatters import DatetimeTickFormatter
from bokeh.models import Div
from bokeh.plotting import figure, output_file, save
output_file(filename="gap_graph.html")
hover = HoverTool(point_policy='snap_to_data',
tooltips=[
("index", "@DateTime{%Y-%m-%d %H:%M:%S}"),
("value", "$y")
],
formatters={
'@DateTime': 'datetime'}
)
c = figure(x_axis_type='datetime', plot_width=500, plot_height=300, tools=[hover, 'pan', 'wheel_zoom', 'box_select', 'reset'], title="Current")
p = figure(x_axis_type='datetime', plot_width=500, plot_height=300, tools=c.tools, x_range=c.x_range, title="Power")
s = figure(x_axis_type='datetime', plot_width=500, plot_height=300, tools=c.tools, x_range=c.x_range, title="Speed")
v = figure(x_axis_type='datetime', plot_width=500, plot_height=300, tools=c.tools, x_range=c.x_range, title="Voltage")
o = figure(x_axis_type='datetime', plot_width=500, plot_height=300, tools=c.tools, x_range=c.x_range, title="Odometer")
c.line('DateTime', y='Current', source=current_threeToseven)
p.line('DateTime', y='Power', source=power_threeToseven)
s.line('DateTime', y='Speed', source=speed_threeToseven)
v.line('DateTime', y='Voltage', source=voltage_threeToseven)
o.line('DateTime', y='Odometer', source=august_threeToseven)
a = gridplot([[c,p], [s, v], [o]])
save(a)
#show(a)
#current_threeToseven.plot_bokeh(kind='line', rangetool=True, hover.tooltips=[('$index')])
'/content/gap_graph.html'
output_file(filename="voltage_graph_zoom.html")
hover = HoverTool(line_policy='nearest', point_policy='snap_to_data', tooltips=[
("index", "@DateTime{%Y-%m-%d %H:%M:%S}"),
("value", "$y")
],
formatters={
'@DateTime': 'datetime'}
)
v = figure(x_axis_type='datetime', plot_width=1100, plot_height=400, tools=[hover, 'pan', 'wheel_zoom', 'box_select', 'reset'], title="Voltage")
v.line('DateTime', y='Voltage', source=voltage_threeToseven)
save(v)
#show(v)
'/content/voltage_graph_zoom.html'
Basic Bokeh graphing template:
from bokeh.plotting import figure, output_file, output_notebook, show
from bokeh.layouts import gridplot
# create a Figure object
p = figure(width=300, height=300, tools="pan,reset,save")
# add a Circle renderer to this figure
p.circle([1, 2.5, 3, 2], [2, 3, 1, 1.5], radius=0.3, alpha=0.5)
# specify how to output the plot(s)
#output_file("foo.html")
output_notebook()
# display the figure
show(p)
p = figure(x_axis_type='datetime', plot_width=1000, plot_height=500, title="Power vs Grade")
p.extra_y_ranges = {"grade": Range1d(start=-2, end=2)}
p.line(august_threeToseven_i_byHour_d.index, august_threeToseven_i_byHour_d.Current, line_color='navy', line_width=1, alpha=.5, y_range_name="grade")
p.line(august_threeToseven_i_byHour_d.index, august_threeToseven_i_byHour_d.Power, line_color="orange", line_width=1, alpha=.5, )
p.add_layout(LinearAxis(y_range_name="grade"), 'left')
show(p)
p = figure(x_axis_type='datetime', plot_width=1000, plot_height=500, title="Power vs Grade")
p.extra_y_ranges = {"grade": Range1d(start=-2, end=2)}
p.line(august_threeToseven_i_byMin_d.index, august_threeToseven_i_byMin_d.Grade, line_color='navy', line_width=1, alpha=.5, y_range_name="grade")
p.line(august_threeToseven_i_byMin_d.index, august_threeToseven_i_byMin_d.Angle, line_color="orange", line_width=1, alpha=.5, )
p.add_layout(LinearAxis(y_range_name="grade"), 'left')
show(p)
Basic Pandas Bokeh template: different from Bokeh template above.
voltage_threeToseven.plot_bokeh(kind='line', x='DateTime', y='Voltage', figsize=(1100,500), rangetool=True)
import seaborn as sns
#sns.pairplot(august_threeToseven_i_byHour_d)
grouped = august_threeToseven_i['Power'].groupby(august_threeToseven_i['Grade']).mean()
grouped
Grade
-4.10 -2.555390
-3.90 -27.839850
-3.86 -8.896156
-3.52 -29.703827
-3.04 36.538808
...
2.14 11.073946
2.94 21.573493
3.56 -29.260614
6.56 42.293575
6.84 37.319966
Name: Power, Length: 248, dtype: float64
august_threeToseven_i['Day'] = august_threeToseven_i.index.day_name()
import seaborn as sns
import matplotlib.pyplot as plt
d = august_threeToseven_i.round({'Grade': 0})
# setting the dimensions of the plot
fig, ax = plt.subplots(figsize=(20, 10))
# Draw a nested boxplot to show bills by day and time
sns.boxplot(x='Grade', y='Power', hue="Day", ax=ax, data=d)
plt.show()
count_ofactivity
| Current | Voltage | Power | Elevation | Speed | Odometer | Temperature | |
|---|---|---|---|---|---|---|---|
| DateTime | |||||||
| 2020-08-01 | 9534 | 9534 | 9534 | 9534 | 9534 | 9534 | 9534 |
| 2020-08-02 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2020-08-03 | 12850 | 12850 | 12850 | 12850 | 12850 | 12850 | 12850 |
| 2020-08-04 | 12530 | 12530 | 12530 | 12530 | 12530 | 12530 | 12530 |
| 2020-08-05 | 12649 | 12649 | 12649 | 12649 | 12649 | 12649 | 12649 |
| 2020-08-06 | 13765 | 13765 | 13765 | 13765 | 13765 | 13765 | 13765 |
| 2020-08-07 | 14199 | 14199 | 14199 | 14199 | 14199 | 14199 | 14199 |
| 2020-08-08 | 8716 | 8716 | 8716 | 8716 | 8716 | 8716 | 8716 |
| 2020-08-09 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2020-08-10 | 12930 | 12930 | 12930 | 12930 | 12930 | 12930 | 12930 |
| 2020-08-11 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2020-08-12 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2020-08-13 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2020-08-14 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2020-08-15 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2020-08-16 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2020-08-17 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2020-08-18 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |
| 2020-08-19 | 3151 | 3151 | 3151 | 3151 | 3151 | 3151 | 3151 |
| 2020-08-20 | 7725 | 7725 | 7725 | 7725 | 7725 | 7725 | 7725 |
| 2020-08-21 | 12653 | 12653 | 12653 | 12653 | 12653 | 12653 | 12653 |
| 2020-08-22 | 11193 | 11193 | 11193 | 11193 | 11193 | 11193 | 11193 |
| 2020-08-23 | 5573 | 5573 | 5573 | 5573 | 5573 | 5573 | 5573 |
| 2020-08-24 | 12659 | 12659 | 12659 | 12659 | 12659 | 12659 | 12659 |
| 2020-08-25 | 11731 | 11731 | 11731 | 11731 | 11731 | 11731 | 11731 |
| 2020-08-26 | 29 | 29 | 29 | 29 | 29 | 29 | 29 |
| 2020-08-27 | 7165 | 7165 | 7165 | 7165 | 7165 | 7165 | 7165 |
| 2020-08-28 | 6859 | 6859 | 6859 | 6859 | 6859 | 6859 | 6859 |
| 2020-08-29 | 12675 | 12675 | 12675 | 12675 | 12675 | 12675 | 12675 |
| 2020-08-30 | 1535 | 1535 | 1535 | 1535 | 1535 | 1535 | 1535 |
# Information from the original csv files. (current, voltage, power, speed, odometer).
i = ['8-2', '8-9', '8-11', '8-12', '8-13', '8-14', '8-15', '8-16', '8-17', '8-18']
d = {'Current': [9187, 17543, 19860, 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 8126],
'Voltage': [693, 819, 2029, 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 229],
'Power': [9207, 17546, 19860, 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 8119],
'Speed': ['NA', 1, 4, 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 40],
'Odometer': ['NA', 1, 2029, 'NA', 'NA', 'NA', 'NA', 'NA', 'NA', 2]}
august_inactivity = pd.DataFrame(data=d, index=i)
august_inactivity
| Current | Voltage | Power | Speed | Odometer | |
|---|---|---|---|---|---|
| 8-2 | 9187 | 693 | 9207 | NA | NA |
| 8-9 | 17543 | 819 | 17546 | 1 | 1 |
| 8-11 | 19860 | 2029 | 19860 | 4 | 2029 |
| 8-12 | NA | NA | NA | NA | NA |
| 8-13 | NA | NA | NA | NA | NA |
| 8-14 | NA | NA | NA | NA | NA |
| 8-15 | NA | NA | NA | NA | NA |
| 8-16 | NA | NA | NA | NA | NA |
| 8-17 | NA | NA | NA | NA | NA |
| 8-18 | 8126 | 229 | 8119 | 40 | 2 |
Zero activity: We can see that dates 8/12 - 8/17 had no activity. There are no timestamps in the original csv file, so we can conclude that it(charging, EV bus, ECU or whatever) was offline. For dates 8/2, 8/9, 8/11 being zero is due to the merging and resampling process to 'Day' frequency that dropped unmatching timestamps and/or ignored missing resolution for resampling purpose. In other words, the 'Count Of Activity' table was made from a merged dataset that dropped unmathcing timestamps already. Same reasoning can be applied to 8/18 where only 3 timestamps matched-up and 'survived' the resampling process.
# Output table to html for website.
html = august_inactivity.to_html()
# Write html to file.
file = open("august_inactiviy.html", "w")
file.write(html)
file.close()
# To investigate voltage gaps/spikes for time period between 8/3 and 8/7.
voltage_gap1 = voltage[(voltage['DateTime'] >= '2020-08-03 21:31:30') & (voltage['DateTime'] <= '2020-08-04 00:41:50')]
voltage_gap1['Voltage'].value_counts()[0]
3
voltage_gap1
| DateTime | Voltage | |
|---|---|---|
| 29494 | 2020-08-03 21:31:30 | 535 |
| 29495 | 2020-08-03 21:31:37 | 535 |
| 29496 | 2020-08-03 21:31:38 | 0 |
| 29497 | 2020-08-04 00:39:23 | 0 |
| 29498 | 2020-08-04 00:39:24 | 525 |
| 29499 | 2020-08-04 00:41:28 | 525 |
| 29500 | 2020-08-04 00:41:29 | 0 |
# Output table to html for website.
html = voltage_gap1.to_html()
# Write html to file.
file = open("voltage_gap1.html", "w")
file.write(html)
file.close()
voltage_gap2 = voltage[(voltage['DateTime'] >= '2020-08-03 21:29:52') & (voltage['DateTime'] <= '2020-08-03 21:30:41')]
voltage_gap2
| DateTime | Voltage | |
|---|---|---|
| 29442 | 2020-08-03 21:29:52 | 546 |
| 29443 | 2020-08-03 21:29:53 | 547 |
| 29444 | 2020-08-03 21:29:58 | 547 |
| 29445 | 2020-08-03 21:29:59 | 548 |
| 29446 | 2020-08-03 21:30:05 | 548 |
| 29447 | 2020-08-03 21:30:06 | 547 |
| 29448 | 2020-08-03 21:30:07 | 548 |
| 29449 | 2020-08-03 21:30:08 | 548 |
| 29450 | 2020-08-03 21:30:09 | 547 |
| 29451 | 2020-08-03 21:30:12 | 547 |
| 29452 | 2020-08-03 21:30:13 | 548 |
| 29453 | 2020-08-03 21:30:14 | 548 |
| 29454 | 2020-08-03 21:30:15 | 549 |
| 29455 | 2020-08-03 21:30:16 | 550 |
| 29456 | 2020-08-03 21:30:17 | 551 |
| 29457 | 2020-08-03 21:30:18 | 552 |
| 29458 | 2020-08-03 21:30:19 | 553 |
| 29459 | 2020-08-03 21:30:20 | 553 |
| 29460 | 2020-08-03 21:30:21 | 554 |
| 29461 | 2020-08-03 21:30:22 | 555 |
| 29462 | 2020-08-03 21:30:24 | 555 |
| 29463 | 2020-08-03 21:30:25 | 556 |
| 29464 | 2020-08-03 21:30:26 | 556 |
| 29465 | 2020-08-03 21:30:27 | 557 |
| 29466 | 2020-08-03 21:30:28 | 557 |
| 29467 | 2020-08-03 21:30:29 | 558 |
| 29468 | 2020-08-03 21:30:31 | 558 |
| 29469 | 2020-08-03 21:30:32 | 559 |
| 29470 | 2020-08-03 21:30:33 | 558 |
| 29471 | 2020-08-03 21:30:35 | 558 |
| 29472 | 2020-08-03 21:30:36 | 556 |
| 29473 | 2020-08-03 21:30:37 | 551 |
| 29474 | 2020-08-03 21:30:38 | 549 |
| 29475 | 2020-08-03 21:30:39 | 547 |
| 29476 | 2020-08-03 21:30:40 | 546 |
| 29477 | 2020-08-03 21:30:41 | 545 |
# Output table to html for website.
html = voltage_gap2.to_html()
# Write html to file.
file = open("voltage_gap2.html", "w")
file.write(html)
file.close()
# To subdivide the GPS dataset into 7 chunks for export for further processing.
"""
gps1 = gps['GPS'].iloc[:100000]
gps2 = gps['GPS'].iloc[100000:200000]
gps3 = gps['GPS'].iloc[200000:300000]
gps4 = gps['GPS'].iloc[300000:400000]
gps5 = gps['GPS'].iloc[400000:500000]
gps6 = gps['GPS'].iloc[500000:600000]
gps7 = gps['GPS'].iloc[600000:624302]
"""
"\ngps1 = gps['GPS'].iloc[:100000]\ngps2 = gps['GPS'].iloc[100000:200000]\ngps3 = gps['GPS'].iloc[200000:300000]\ngps4 = gps['GPS'].iloc[300000:400000]\ngps5 = gps['GPS'].iloc[400000:500000]\ngps6 = gps['GPS'].iloc[500000:600000]\ngps7 = gps['GPS'].iloc[600000:624302]\n"
# Export GPS dataset without index or header info. (For use at GPSvisualizer.com)
"""
gps1.to_csv('gps1.csv', index=False, header=False)
gps2.to_csv('gps2.csv', index=False, header=False)
gps3.to_csv('gps3.csv', index=False, header=False)
gps4.to_csv('gps4.csv', index=False, header=False)
gps5.to_csv('gps5.csv', index=False, header=False)
gps6.to_csv('gps6.csv', index=False, header=False)
gps7.to_csv('gps7.csv', index=False, header=False)
"""
"\ngps1.to_csv('gps1.csv', index=False, header=False)\ngps2.to_csv('gps2.csv', index=False, header=False)\ngps3.to_csv('gps3.csv', index=False, header=False)\ngps4.to_csv('gps4.csv', index=False, header=False)\ngps5.to_csv('gps5.csv', index=False, header=False)\ngps6.to_csv('gps6.csv', index=False, header=False)\ngps7.to_csv('gps7.csv', index=False, header=False)\n"